Professional Documents
Culture Documents
Name:Fedrick Samuel W Reg No: 19MIS1112 Course: Machine Learning (SWE4012) Slot: L11 + L12 Faculty: Dr.M. Premalatha
Name:Fedrick Samuel W Reg No: 19MIS1112 Course: Machine Learning (SWE4012) Slot: L11 + L12 Faculty: Dr.M. Premalatha
file
col = pd.get_dummies(file['Purchased'])
plot.xlabel("salary")
plot.ylabel("yes")
plot.title("Purchased (yes) vs Salary")
sb.lmplot(x='Yes', y="Salary", data=file, aspect=2, height=6)
plot.title("Purchased (No) vs Salary")
sb.lmplot(x='No', y="Salary", data=file, aspect=2, height=6)
<seaborn.axisgrid.FacetGrid at 0x275dd2ea790>
file.describe()
Salary No Yes
count 9.000000 10.000000 10.000000
mean 38.555556 0.500000 0.500000
std 7.779960 0.527046 0.527046
min 27.000000 0.000000 0.000000
25% 35.000000 0.000000 0.000000
50% 38.000000 0.500000 0.500000
75% 44.000000 1.000000 1.000000
max 50.000000 1.000000 1.000000
plot.figure(figsize=(12,4))
sb.heatmap(df.isnull(),cbar=False,cmap='viridis',yticklabels=False)
plot.title('Missing value in the dataset');
file=file.dropna(how="any")
file.isnull().sum()
file
plot.figure(figsize=(12,4))
sb.heatmap(file.isnull(),cbar=False,cmap='viridis',yticklabels=False)
plot.title('Missing value in the dataset');
# correlation plot
corr = file.corr()
sb.heatmap(corr, cmap = 'Wistia', annot= True);
import warnings
warnings.filterwarnings('ignore')
file.shape
(9, 4)
file.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 9 entries, 0 to 9
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Country 9 non-null object
1 Salary 9 non-null float64
2 No 9 non-null uint8
3 Yes 9 non-null uint8
dtypes: float64(1), object(1), uint8(2)
memory usage: 234.0+ bytes
['Country']
df[categorical].isnull().sum()
Country 0
dtype: int64
print(df[var].value_counts())
France 4
Spain 3
Germany 2
Germany 1
Name: Country, dtype: int64
print(df[var].value_counts()/np.float(len(df)))
France 0.4
Spain 0.3
Germany 0.2
Germany 0.1
Name: Country, dtype: float64
Salary No Yes
0 44.0 1 0
1 27.0 0 1
2 30.0 1 0
3 38.0 1 0
4 40.0 0 1
df[numerical].isnull().sum()
Salary 1
No 0
Yes 0
dtype: int64