Professional Documents
Culture Documents
Pre-Processing Example - 1
Pre-Processing Example - 1
Example – 1:
import pandas as pd
print(data.columns)
print(len(data.columns))
print(len(data))
print(data.dtypes)
print(data.isnull().values.any())
print(data.isnull().values.any())
print(len(data['Salary'].unique()))
exp = list(data.Experience)
print(exp)
min_ex = []
max_ex = []
for i in range(len(exp)):
exp[i] = exp[i].replace("yrs","").strip()
min_ex.append(int(exp[i].split("-")[0].strip()))
max_ex.append(int(exp[i].split("-")[1].strip()))
#Attaching the new experiences to the original dataset
data["minimum_exp"] = min_ex
data["maximum_exp"] = max_ex
le = LabelEncoder()
data['Location'] = le.fit_transform(data['Location'])
data['Salary'] = le.fit_transform(data['Salary'])
print(data['Location'])
print(data['Salary'])
print(data)
Index=data['Index']
Company=data['Company']
Location = data['Location']
Salary = data['Salary']
minimum_exp = data['minimum_exp']
maximum_exp = data['maximum_exp']
# dictionary of lists
df = pd.DataFrame(dict)
df.to_csv('J:\Machine Learning\Class\Practical\Practical_1\File4.csv')
X_train = training_set.iloc[:,0:-1].values
Y_train = training_set.iloc[:,-1].values
X_val = validation_set.iloc[:,0:-1].values
y_val = validation_set.iloc[:,-1].values
X_train
Y_train
X_val
y_val
def accuracy(confusion_matrix):
diagonal_sum = confusion_matrix.trace()
sum_of_all_elements = confusion_matrix.sum()
classifier = GaussianNB()
classifier.fit(X_train, Y_train)
y_pred = classifier.predict(X_val)
cm = confusion_matrix(y_val, y_pred)
print(cm)
print("__ACCURACY = ",accuracy(cm))
Example – 2:
import pandas as pd
print(dataset.columns)
dataset
dataset.info()
X = dataset.iloc[:, :-1].values #Takes all rows of all columns except the last column
imputer=Imputer(missing_values='NaN',strategy='mean',axis=0)
imputer.fit(X[:,1:3])
X[:,1:3]=imputer.transform(X[:,1:3])