Assignments - Day 2 - Jupyter Notebook

In [55]: import pandas as pd
from pandas import Series,DataFrame

from sklearn.preprocessing import LabelEncoder,StandardScaler, MinMaxScaler,
import numpy as np
In [60]: data = pd.read_csv(r"C:\Users\shirin\OneDrive\Desktop\MiniProjects\Datasets\

#1)read the iris data set and print head,tail,info,describe
data
Out[60]: 150 4 setosa versicolor disorder
0 5.1 3.5 1.4 0.2 Normal
1 4.9 3.0 1.4 0.2 Normal
2 4.7 3.2 1.3 0.2 Normal
3 4.6 3.1 1.5 0.2 Normal
4 5.0 3.6 1.4 0.2 Normal
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 right eye disorder
150 rows × 5 columns
In [61]: data.head()
0 5.1 3.5 1.4 0.2 Normal
1 4.9 3.0 1.4 0.2 Normal
2 4.7 3.2 1.3 0.2 Normal
3 4.6 3.1 1.5 0.2 Normal
4 5.0 3.6 1.4 0.2 Normal
In [62]: data.tail()

In [63]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 150 150 non-null float64
1 4 150 non-null float64
2 setosa 150 non-null float64
3 versicolor 150 non-null float64
4 disorder 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
In [64]: data.describe()
Out[64]: 150 4 setosa versicolor
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000 1.199333
std 0.828066 0.435866 1.765298 0.762238
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
In [65]: #2)perform the label encoding operation on last column

data['disorder'].unique()
Out[65]: array(['Normal', 'left eye disorder', 'right eye disorder'], dtype=object)

In [79]: # Assuming the last column is the target column you want to encode
target_column = data.columns[-1]

# Instantiate the LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the target column
data[target_column] = label_encoder.fit_transform(data[target_column])

# Display the updated DataFrame
data
0 5.1 3.5 1.4 0.2 0
1 4.9 3.0 1.4 0.2 0
2 4.7 3.2 1.3 0.2 0
3 4.6 3.1 1.5 0.2 0
4 5.0 3.6 1.4 0.2 0
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 2
146 6.3 2.5 5.0 1.9 2
147 6.5 3.0 5.2 2.0 2
148 6.2 3.4 5.4 2.3 2
149 5.9 3.0 5.1 1.8 2
In [76]: #3)perform standard scalar min/max scalar, uniform scalar operations on same
In [81]: # Extract the numerical features (assuming they are all numeric in this case
numeric_features = data.drop('versicolor', axis=1)
# StandardScaler
standard_scaler = StandardScaler()
#standard_scaled_data = standard_scaler.fit_transform(numeric_features)
iris_df_standard_scaled = pd.DataFrame(standard_scaled_data, columns=numeric
iris_df_standard_scaled
Out[81]: 150 4 setosa disorder
0 -0.900681 1.019004 -1.315444 -1.224745
1 -1.143017 -0.131979 -1.315444 -1.224745
2 -1.385353 0.328414 -1.315444 -1.224745
3 -1.506521 0.098217 -1.315444 -1.224745
4 -1.021849 1.249201 -1.315444 -1.224745
... ... ... ... ...
145 1.038005 -0.131979 1.448832 1.224745
146 0.553333 -1.282963 0.922303 1.224745
147 0.795669 -0.131979 1.053935 1.224745
148 0.432165 0.788808 1.448832 1.224745
149 0.068662 -0.131979 0.790671 1.224745
In [82]: # MinMaxScaler
min_max_scaler = MinMaxScaler()
min_max_scaled_data = min_max_scaler.fit_transform(numeric_features)
iris_df_min_max_scaled = pd.DataFrame(min_max_scaled_data, columns=numeric_f
iris_df_min_max_scaled
0 0.222222 0.625000 0.067797 0.0
1 0.166667 0.416667 0.067797 0.0
2 0.111111 0.500000 0.050847 0.0
3 0.083333 0.458333 0.084746 0.0
4 0.194444 0.666667 0.067797 0.0
... ... ... ... ...
145 0.666667 0.416667 0.711864 1.0
146 0.555556 0.208333 0.677966 1.0
147 0.611111 0.416667 0.711864 1.0
148 0.527778 0.583333 0.745763 1.0
149 0.444444 0.416667 0.694915 1.0

In [83]: # RobustScalar
robust_scaler = RobustScaler()
robust_scaled_data = robust_scaler.fit_transform(numeric_features)
iris_df_robust_scaled = pd.DataFrame(robust_scaled_data, columns=numeric_fea
iris_df_robust_scaled
0 -0.538462 1.0 -0.842857 -0.5
1 -0.692308 0.0 -0.842857 -0.5
2 -0.846154 0.4 -0.871429 -0.5
3 -0.923077 0.2 -0.814286 -0.5
4 -0.615385 1.2 -0.842857 -0.5
... ... ... ... ...
145 0.692308 0.0 0.242857 0.5
146 0.384615 -1.0 0.185714 0.5
147 0.538462 0.0 0.242857 0.5
148 0.307692 0.8 0.300000 0.5
149 0.076923 0.0 0.214286 0.5
In [84]: #4)check any null values in the dataset if present replace with zeros
In [46]: iris_df.isnull()
0 False False False False False
... ... ... ... ... ...
In [50]: iris_df=iris_df.fillna(0,inplace=True)
In [ ]: #5)apply principle component analaysis,smoete operations on same dataset
In [ ]:
In [ ]:
In [51]: #6)separate the columns of dataset into independent variable, depended varia
In [89]: indep = data.columns[:len(data.columns)-1]

dep = data.columns[len(data.columns)-1:]

print("independent",indep)
print("dependent",dep)
independent Index(['150', '4', 'setosa', 'versicolor'], dtype='object')

dependent Index(['disorder'], dtype='object')

Assignments - Day 2 - Jupyter Notebook

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Assignments - Day 2 - Jupyter Notebook

Uploaded by

Copyright:

Available Formats

In [55]: import pandas as pd

from pandas import Series,DataFrame

In [60]: data = pd.read_csv(r"C:\Users\shirin\OneDrive\Desktop\MiniProjects\Datasets\

Out[60]: 150 4 setosa versicolor disorder

0 5.1 3.5 1.4 0.2 Normal

1 4.9 3.0 1.4 0.2 Normal

2 4.7 3.2 1.3 0.2 Normal

3 4.6 3.1 1.5 0.2 Normal

4 5.0 3.6 1.4 0.2 Normal

... ... ... ... ... ...

145 6.7 3.0 5.2 2.3 right eye disorder

146 6.3 2.5 5.0 1.9 right eye disorder

147 6.5 3.0 5.2 2.0 right eye disorder

148 6.2 3.4 5.4 2.3 right eye disorder

149 5.9 3.0 5.1 1.8 right eye disorder

150 rows × 5 columns

Out[61]: 150 4 setosa versicolor disorder

0 5.1 3.5 1.4 0.2 Normal

1 4.9 3.0 1.4 0.2 Normal

2 4.7 3.2 1.3 0.2 Normal

3 4.6 3.1 1.5 0.2 Normal

4 5.0 3.6 1.4 0.2 Normal

Out[62]: 150 4 setosa versicolor disorder

145 6.7 3.0 5.2 2.3 right eye disorder

146 6.3 2.5 5.0 1.9 right eye disorder

147 6.5 3.0 5.2 2.0 right eye disorder

148 6.2 3.4 5.4 2.3 right eye disorder

149 5.9 3.0 5.1 1.8 right eye disorder

Out[64]: 150 4 setosa versicolor

count 150.000000 150.000000 150.000000 150.000000

mean 5.843333 3.057333 3.758000 1.199333

std 0.828066 0.435866 1.765298 0.762238

min 4.300000 2.000000 1.000000 0.100000

25% 5.100000 2.800000 1.600000 0.300000

50% 5.800000 3.000000 4.350000 1.300000

75% 6.400000 3.300000 5.100000 1.800000

max 7.900000 4.400000 6.900000 2.500000

In [65]: #2)perform the label encoding operation on last column

Out[65]: array(['Normal', 'left eye disorder', 'right eye disorder'], dtype=object)

Out[79]: 150 4 setosa versicolor disorder

0 5.1 3.5 1.4 0.2 0

1 4.9 3.0 1.4 0.2 0

2 4.7 3.2 1.3 0.2 0

3 4.6 3.1 1.5 0.2 0

4 5.0 3.6 1.4 0.2 0

... ... ... ... ... ...

145 6.7 3.0 5.2 2.3 2

146 6.3 2.5 5.0 1.9 2

147 6.5 3.0 5.2 2.0 2

148 6.2 3.4 5.4 2.3 2

149 5.9 3.0 5.1 1.8 2

150 rows × 5 columns

Out[81]: 150 4 setosa disorder

0 -0.900681 1.019004 -1.315444 -1.224745

1 -1.143017 -0.131979 -1.315444 -1.224745

2 -1.385353 0.328414 -1.315444 -1.224745

3 -1.506521 0.098217 -1.315444 -1.224745

4 -1.021849 1.249201 -1.315444 -1.224745

... ... ... ... ...

145 1.038005 -0.131979 1.448832 1.224745

146 0.553333 -1.282963 0.922303 1.224745

147 0.795669 -0.131979 1.053935 1.224745

148 0.432165 0.788808 1.448832 1.224745