Professional Documents
Culture Documents
Grafik
Grafik
ipynb - Colaboratory
import numpy as np # Array handler
import pandas as pd # Data analysis
import seaborn as sns #Visualization
import matplotlib.pyplot as plt #Visualization
from sklearn.preprocessing import LabelEncoder # Handling categorial features
from sklearn.preprocessing import MinMaxScaler # Features scaler
import scipy
from sklearn.cluster import KMeans # Clustering machine learning
from sklearn.metrics import silhouette_score
import warnings
warnings.filterwarnings("ignore")
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
file = '/content/drive/MyDrive/data/CARS_1.csv'
df = pd.read_csv(file)
df.head()
Maruti
0 51 Petrol 998 3 5.0 Auto
Alto K10
Maruti
1 86 Petrol 1462 4 5.0 Auto
Brezza
Mahindra
2 242 Diesel 2184 4 4.0 Auto
Thar
Mahindra
3 313 Diesel 2198 4 7.0 Auto
XUV700
Mahindra
4 Scorpio- 107 Diesel 2198 4 7.0 Auto
N
df.describe()
df.isnull().sum()
car_name 0
reviews_count 0
fuel_type 0
engine_displacement 0
no_cylinder 0
seating_capacity 1
transmission_type 0
https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 1/4
6/15/23, 10:12 AM 825210140_Grafik.ipynb - Colaboratory
fuel_tank_capacity 0
body_type 0
rating 0
starting_price 0
ending_price 0
max_torque_nm 0
max_torque_rpm 0
max_power_bhp 0
max_power_rp 0
dtype: int64
df.dropna(inplace = True)
df.isnull().sum()
car_name 0
reviews_count 0
fuel_type 0
engine_displacement 0
no_cylinder 0
seating_capacity 0
transmission_type 0
fuel_tank_capacity 0
body_type 0
rating 0
starting_price 0
ending_price 0
max_torque_nm 0
max_torque_rpm 0
max_power_bhp 0
max_power_rp 0
dtype: int64
df['average_price'] = round((df['starting_price']+df['ending_price'])/2,2)
df= df[['car_name','reviews_count','fuel_type','engine_displacement','no_cylinder','seating_capacity','transmission_type','fuel_tank_capa
df.head()
Maruti
0 51 Petrol 998 3 5.0 Auto
Alto K10
Maruti
1 86 Petrol 1462 4 5.0 Auto
Brezza
Mahindra
2 242 Diesel 2184 4 4.0 Auto
Thar
Mahindra
3 313 Diesel 2198 4 7.0 Auto
XUV700
Mahindra
4 Scorpio- 107 Diesel 2198 4 7.0 Auto
N
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
listCat = df.select_dtypes(include='object').columns.tolist()
listNum = df.select_dtypes(include=numerics).columns.tolist()
tempstring1 = ''
tempstring2 = ''
for i in range(len(listCat)):
if i != len(listCat)-1:
tempstring1 = str(tempstring1)+listCat[i]+', '
else:
tempstring1 = str(tempstring1)+listCat[i]+'.'
for i in range(len(listNum)):
if i != len(listNum)-1:
tempstring2 = str(tempstring2)+listNum[i]+', '
else:
tempstring2 = str(tempstring2)+listNum[i]+'.'
print('Categorial features: '+tempstring1)
print('Numerical features: '+tempstring2)
plt.figure(figsize = (40,18))
subplot = 1
for i in listNum:
plt.subplot(3,5,subplot)
https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 2/4
6/15/23, 10:12 AM 825210140_Grafik.ipynb - Colaboratory
sns.boxplot(data=df, x=i)
subplot = subplot +1
plt.show()
plt.figure(figsize = (40,18))
subplot = 1
for i in listNum:
plt.subplot(3,5,subplot)
sns.histplot(data=df, x=i)
subplot = subplot +1
plt.show()
https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 3/4
6/15/23, 10:12 AM 825210140_Grafik.ipynb - Colaboratory
plt.figure(figsize = (24,6))
subplot = 1
for i in listCat:
plt.subplot(1,3,subplot)
sns.countplot(data=df, x=i)
subplot = subplot +1
if i == 'body_type':
plt.xticks(rotation = 90)
plt.show()
https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 4/4