Download as pdf or txt
Download as pdf or txt
You are on page 1of 4

6/15/23, 10:12 AM 825210140_Grafik.

ipynb - Colaboratory

import numpy as np # Array handler
import pandas as pd # Data analysis
import seaborn as sns  #Visualization
import matplotlib.pyplot as plt #Visualization

from sklearn.preprocessing import LabelEncoder # Handling categorial features
from sklearn.preprocessing import MinMaxScaler # Features scaler
import scipy

from sklearn.cluster import KMeans # Clustering machine learning
from sklearn.metrics import silhouette_score

import warnings
warnings.filterwarnings("ignore")

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

file = '/content/drive/MyDrive/data/CARS_1.csv'
df = pd.read_csv(file)

df.head()

car_name reviews_count fuel_type engine_displacement no_cylinder seating_capacity transmission

Maruti
0 51 Petrol 998 3 5.0 Auto
Alto K10

Maruti
1 86 Petrol 1462 4 5.0 Auto
Brezza

Mahindra
2 242 Diesel 2184 4 4.0 Auto
Thar

Mahindra
3 313 Diesel 2198 4 7.0 Auto
XUV700

Mahindra
4 Scorpio- 107 Diesel 2198 4 7.0 Auto
N

df.describe()

reviews_count engine_displacement no_cylinder seating_capacity fuel_tank_capacity ratin

count 203.000000 203.000000 203.000000 202.000000 203.000000 203.00000

mean 118.684729 2305.921182 4.709360 5.014851 46.143842 4.43349

std 314.331318 1493.837786 2.538664 1.161050 28.904207 0.28013

min 1.000000 0.000000 0.000000 2.000000 0.000000 3.00000

25% 5.000000 1339.500000 4.000000 5.000000 33.500000 4.50000

50% 14.000000 1991.000000 4.000000 5.000000 50.000000 4.50000

75% 83.000000 2996.000000 6.000000 5.000000 66.000000 4.50000

max 2392.000000 6750.000000 12.000000 8.000000 100.000000 5.00000

df.isnull().sum()

car_name 0
reviews_count 0
fuel_type 0
engine_displacement 0
no_cylinder 0
seating_capacity 1
transmission_type 0

https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 1/4
6/15/23, 10:12 AM 825210140_Grafik.ipynb - Colaboratory
fuel_tank_capacity 0
body_type 0
rating 0
starting_price 0
ending_price 0
max_torque_nm 0
max_torque_rpm 0
max_power_bhp 0
max_power_rp 0
dtype: int64

df.dropna(inplace = True)
df.isnull().sum()

car_name 0
reviews_count 0
fuel_type 0
engine_displacement 0
no_cylinder 0
seating_capacity 0
transmission_type 0
fuel_tank_capacity 0
body_type 0
rating 0
starting_price 0
ending_price 0
max_torque_nm 0
max_torque_rpm 0
max_power_bhp 0
max_power_rp 0
dtype: int64

df['average_price'] = round((df['starting_price']+df['ending_price'])/2,2)
df= df[['car_name','reviews_count','fuel_type','engine_displacement','no_cylinder','seating_capacity','transmission_type','fuel_tank_capa
df.head()

car_name reviews_count fuel_type engine_displacement no_cylinder seating_capacity transmission

Maruti
0 51 Petrol 998 3 5.0 Auto
Alto K10

Maruti
1 86 Petrol 1462 4 5.0 Auto
Brezza

Mahindra
2 242 Diesel 2184 4 4.0 Auto
Thar

Mahindra
3 313 Diesel 2198 4 7.0 Auto
XUV700

Mahindra
4 Scorpio- 107 Diesel 2198 4 7.0 Auto
N

numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
listCat = df.select_dtypes(include='object').columns.tolist()
listNum = df.select_dtypes(include=numerics).columns.tolist()
tempstring1 = ''
tempstring2 = ''
for i in range(len(listCat)):
    if i != len(listCat)-1:
        tempstring1 = str(tempstring1)+listCat[i]+', '
    else:
        tempstring1 = str(tempstring1)+listCat[i]+'.'
for i in range(len(listNum)):
    if i != len(listNum)-1:
        tempstring2 = str(tempstring2)+listNum[i]+', '
    else:
        tempstring2 = str(tempstring2)+listNum[i]+'.'
print('Categorial features: '+tempstring1)
print('Numerical features: '+tempstring2)   

Categorial features: car_name, fuel_type, transmission_type, body_type.


Numerical features: reviews_count, engine_displacement, no_cylinder, seating_capacity, fuel_tank_capacity, rating, starting_price,

plt.figure(figsize = (40,18))
subplot = 1
for i in listNum: 
    plt.subplot(3,5,subplot)

https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 2/4
6/15/23, 10:12 AM 825210140_Grafik.ipynb - Colaboratory
    sns.boxplot(data=df, x=i)
    subplot = subplot +1
plt.show()

plt.figure(figsize = (40,18))
subplot = 1
for i in listNum: 
    plt.subplot(3,5,subplot)
    sns.histplot(data=df, x=i)
    subplot = subplot +1
plt.show()

https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 3/4
6/15/23, 10:12 AM 825210140_Grafik.ipynb - Colaboratory

plt.figure(figsize = (24,6))
subplot = 1
for i in listCat: 
    plt.subplot(1,3,subplot)
    sns.countplot(data=df, x=i)
    subplot = subplot +1
    if i == 'body_type':
        plt.xticks(rotation = 90)
plt.show()

Produk berbayar Colab Batalkan kontrak di sini


check 2 d selesai pada 10.10

https://colab.research.google.com/drive/122_9pX1ERN-K1kfqOBmbqyj8YkoZObuS#scrollTo=uYA0M4nYAvFQ&printMode=true 4/4

You might also like