Use header=None when the columns are not labeled in your csv file
df = pd.read_csv("pathToFile.csv", header=None)Use header=None when the columns are not labeled in your xlsx file
df = pd.read_excel("pathToFile.xlsx", header=None)df.head()df.tail()df.shapedf.columnsdf['column_name'].value_counts()df.describe()df.info()df.isnull().sum()NOTE: import seaborn as sns
sns.heatmap(df.isnull())axis=1 is for columns
df.drop(['column_1','column_2'],axis=1,inplace=True)df['column_name']=df['column_name'].fillna(df['column_name'].mean())df['column_name'] = pd.factorize(df['column_name'])[0]unique = pd.factorize(df['column_name'])[1]df['column_name'].unique()df['columns_name'] = df['column_name'].astype("float")df = df.set_index(df['column_name'])df_bangalore = df[df['city']=='bangalore']
df_lucknow = df[df['city']=='lucknow']df.indexNOTE: Column names are ignored and only float/integers allowed
df.to_numpy()df.sort_values(by='colName')df.copy()df.dropna()df.fillna(value=10)pd.isna(df)df.mean()df.mean(1)pd.concat([df[:2],df[3:6]])pd.merge(df1,df2,on='indexColName')df.groupby('colName').sum()df.subtract(df['col'],axis=0)df.to_csv('filename.csv')df.to_excel('filename.xlsx',sheet_name='Sheet1')Will change categorical data into one column of integer data
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(df['column_name'])df_processed = pd.get_dummies(df, prefix_sep="__",columns=["column_1", "column_2"])NOTE: Make sure you use fit_transform only on train dataset and use just transform for test and post-deployment dataset
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)