Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

In [55]: import pandas as pd

from pandas import Series,DataFrame


from sklearn.preprocessing import LabelEncoder,StandardScaler, MinMaxScaler,
import numpy as np

In [60]: data = pd.read_csv(r"C:\Users\shirin\OneDrive\Desktop\MiniProjects\Datasets\


#1)read the iris data set and print head,tail,info,describe
data

Out[60]: 150 4 setosa versicolor disorder

0 5.1 3.5 1.4 0.2 Normal

1 4.9 3.0 1.4 0.2 Normal

2 4.7 3.2 1.3 0.2 Normal

3 4.6 3.1 1.5 0.2 Normal

4 5.0 3.6 1.4 0.2 Normal

... ... ... ... ... ...

145 6.7 3.0 5.2 2.3 right eye disorder

146 6.3 2.5 5.0 1.9 right eye disorder

147 6.5 3.0 5.2 2.0 right eye disorder

148 6.2 3.4 5.4 2.3 right eye disorder

149 5.9 3.0 5.1 1.8 right eye disorder

150 rows × 5 columns

In [61]: data.head()

Out[61]: 150 4 setosa versicolor disorder

0 5.1 3.5 1.4 0.2 Normal

1 4.9 3.0 1.4 0.2 Normal

2 4.7 3.2 1.3 0.2 Normal

3 4.6 3.1 1.5 0.2 Normal

4 5.0 3.6 1.4 0.2 Normal

In [62]: data.tail()

Out[62]: 150 4 setosa versicolor disorder

145 6.7 3.0 5.2 2.3 right eye disorder

146 6.3 2.5 5.0 1.9 right eye disorder

147 6.5 3.0 5.2 2.0 right eye disorder

148 6.2 3.4 5.4 2.3 right eye disorder

149 5.9 3.0 5.1 1.8 right eye disorder


In [63]: data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 150 150 non-null float64
1 4 150 non-null float64
2 setosa 150 non-null float64
3 versicolor 150 non-null float64
4 disorder 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB

In [64]: data.describe()

Out[64]: 150 4 setosa versicolor

count 150.000000 150.000000 150.000000 150.000000

mean 5.843333 3.057333 3.758000 1.199333

std 0.828066 0.435866 1.765298 0.762238

min 4.300000 2.000000 1.000000 0.100000

25% 5.100000 2.800000 1.600000 0.300000

50% 5.800000 3.000000 4.350000 1.300000

75% 6.400000 3.300000 5.100000 1.800000

max 7.900000 4.400000 6.900000 2.500000

In [65]: #2)perform the label encoding operation on last column


data['disorder'].unique()

Out[65]: array(['Normal', 'left eye disorder', 'right eye disorder'], dtype=object)


In [79]: # Assuming the last column is the target column you want to encode
target_column = data.columns[-1]

# Instantiate the LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the target column
data[target_column] = label_encoder.fit_transform(data[target_column])

# Display the updated DataFrame
data

Out[79]: 150 4 setosa versicolor disorder

0 5.1 3.5 1.4 0.2 0

1 4.9 3.0 1.4 0.2 0

2 4.7 3.2 1.3 0.2 0

3 4.6 3.1 1.5 0.2 0

4 5.0 3.6 1.4 0.2 0

... ... ... ... ... ...

145 6.7 3.0 5.2 2.3 2

146 6.3 2.5 5.0 1.9 2

147 6.5 3.0 5.2 2.0 2

148 6.2 3.4 5.4 2.3 2

149 5.9 3.0 5.1 1.8 2

150 rows × 5 columns

In [76]: #3)perform standard scalar min/max scalar, uniform scalar operations on same
In [81]: # Extract the numerical features (assuming they are all numeric in this case
numeric_features = data.drop('versicolor', axis=1)
# StandardScaler
standard_scaler = StandardScaler()
#standard_scaled_data = standard_scaler.fit_transform(numeric_features)
iris_df_standard_scaled = pd.DataFrame(standard_scaled_data, columns=numeric
iris_df_standard_scaled

Out[81]: 150 4 setosa disorder

0 -0.900681 1.019004 -1.315444 -1.224745

1 -1.143017 -0.131979 -1.315444 -1.224745

2 -1.385353 0.328414 -1.315444 -1.224745

3 -1.506521 0.098217 -1.315444 -1.224745

4 -1.021849 1.249201 -1.315444 -1.224745

... ... ... ... ...

145 1.038005 -0.131979 1.448832 1.224745

146 0.553333 -1.282963 0.922303 1.224745

147 0.795669 -0.131979 1.053935 1.224745

148 0.432165 0.788808 1.448832 1.224745

149 0.068662 -0.131979 0.790671 1.224745

150 rows × 4 columns

In [82]: # MinMaxScaler
min_max_scaler = MinMaxScaler()
min_max_scaled_data = min_max_scaler.fit_transform(numeric_features)
iris_df_min_max_scaled = pd.DataFrame(min_max_scaled_data, columns=numeric_f
iris_df_min_max_scaled

Out[82]: 150 4 setosa disorder

0 0.222222 0.625000 0.067797 0.0

1 0.166667 0.416667 0.067797 0.0

2 0.111111 0.500000 0.050847 0.0

3 0.083333 0.458333 0.084746 0.0

4 0.194444 0.666667 0.067797 0.0

... ... ... ... ...

145 0.666667 0.416667 0.711864 1.0

146 0.555556 0.208333 0.677966 1.0

147 0.611111 0.416667 0.711864 1.0

148 0.527778 0.583333 0.745763 1.0

149 0.444444 0.416667 0.694915 1.0

150 rows × 4 columns


In [83]: # RobustScalar
robust_scaler = RobustScaler()
robust_scaled_data = robust_scaler.fit_transform(numeric_features)
iris_df_robust_scaled = pd.DataFrame(robust_scaled_data, columns=numeric_fea
iris_df_robust_scaled

Out[83]: 150 4 setosa disorder

0 -0.538462 1.0 -0.842857 -0.5

1 -0.692308 0.0 -0.842857 -0.5

2 -0.846154 0.4 -0.871429 -0.5

3 -0.923077 0.2 -0.814286 -0.5

4 -0.615385 1.2 -0.842857 -0.5

... ... ... ... ...

145 0.692308 0.0 0.242857 0.5

146 0.384615 -1.0 0.185714 0.5

147 0.538462 0.0 0.242857 0.5

148 0.307692 0.8 0.300000 0.5

149 0.076923 0.0 0.214286 0.5

150 rows × 4 columns

In [84]: #4)check any null values in the dataset if present replace with zeros

In [46]: iris_df.isnull()

Out[46]: 150 4 setosa versicolor disorder

0 False False False False False

1 False False False False False

2 False False False False False

3 False False False False False

4 False False False False False

... ... ... ... ... ...

145 False False False False False

146 False False False False False

147 False False False False False

148 False False False False False

149 False False False False False

150 rows × 5 columns

In [50]: iris_df=iris_df.fillna(0,inplace=True)
In [ ]: #5)apply principle component analaysis,smoete operations on same dataset

In [ ]: ​

In [ ]: ​

In [51]: #6)separate the columns of dataset into independent variable, depended varia

In [89]: indep = data.columns[:len(data.columns)-1]


dep = data.columns[len(data.columns)-1:]

print("independent",indep)
print("dependent",dep)

independent Index(['150', '4', 'setosa', 'versicolor'], dtype='object')


dependent Index(['disorder'], dtype='object')

You might also like