5 Missing Values - Jupyter Notebook

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

In 

[1]: #missing values

In [1]: import numpy as np


import matplotlib.pyplot as plt
import pandas as pd

In [3]: dataset = pd.read_csv("D:\\Course\\Python\\Datasets\\Data.csv")

In [4]: dataset

...

In [4]: X = dataset.iloc[:, :-1].values


In [5]: X
...

In [10]: #y = dataset.iloc[:, 3].values

In [11]: #y

In [9]: #Step 1 - Import the method from Libraries



from sklearn.impute import SimpleImputer

# creating a funciton using ur imported Method

imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

In [10]: imputer = imputer.fit(X[:, 1:3])


#imputer

In [11]: X[:, 1:3] = imputer.transform(X[:, 1:3])

In [12]: X

...

In [10]: dataset = pd.DataFrame(X)

In [11]: dataset

...
Other Method to handle the Missing Values
In [46]: data = pd.read_csv("D:\\Course\\Python\\Datasets\\Missing Data.csv")

In [47]: data
...

In [28]: # To check the count of missing values in Each Column



data.apply(lambda x: sum(x.isnull()),axis=0)
...

In [29]: # Filling the missing values - age and Salary is continous data type so we will p

# syntax - Datasetname['Missing value column name'].fillna(mean )
data

...

In [30]: # replace missing value with mean in age and Salary



data['Age'].fillna(data['Age'].mean(),inplace = True)
data['Salary'].fillna(data['Salary'].mean(),inplace = True)

In [16]: data

...

In [31]: data['Gender'].mode()

Out[31]: 0 Male

dtype: object

In [54]: # Replace the Discrete datatype with mode



data['Gender'].fillna(data.Gender.mode()[0],inplace = True)

In [55]: data
...

In [58]: # we can also replace the value manually



test = data['Age'].mean()
test

...
In [ ]: data['Salary'].fillna(test,inplace=True)
#data['Salary'].fillna(data['Age'].mean(),inplace=True)
#data['salary'].fillna(38.7777778,inplace=True)

In [ ]: data['salary'].fillna(38.7777778,inplace=True)

You might also like