Download as pdf or txt
Download as pdf or txt
You are on page 1of 45

27/05/2024, 21:04 Day1.

ipynb - Colab

print('hello world')

hello world

t=1
bal=100000
mpin=1234
print('WelCome to Mevi-RRIT Bank ')
while(t<=3):
upin=int(input('Enter Pin'))
if(upin==mpin):
print('Sucessfully Loggedin')
print('1-Withdraw\n2-Deposit')
ch=input('Enter your choice')
if(ch=='1'):
amt=int(input('Enter the Amount to withdraw'))
bal=bal-amt
print('your new balance is',bal)
elif(ch=='2'):
amt=int(input('Enter the amount to deposit'))
bal=bal+amt
print('your new balance is',bal)
break
elif(t==3):
print('sorry account blocked')
else:
print('Invalid Pin Try again')
t=t+1

WelCome to Mevi-RRIT Bank


Enter Pin1234
Sucessfully Loggedin
1-Withdraw
2-Deposit
Enter your choice2
Enter the amount to deposit20000
your new balance is 120000

v=2
while(v):
ans=input('Please give me your phone')
if(ans=='yes'):
print('thank you, you are so sweet')
v=0
else:
print('please please please')

https://colab.research.google.com/drive/12E13aeyoERFp9HCRe_iHpw4QNrLZw8HJ#scrollTo=ow9RXnJ8Z6uN 1/3
27/05/2024, 21:04 Day1.ipynb - Colab

Please give me your phoneno


please please please
Please give me your phoneni
please please please
Please give me your phoneyes
thank you, you are so sweet

bill=0
print('Welcome to MEVI-RRIT Super Market')
while(True):
print('1-Rice Flour-30Rs/kg\n2-CornFlakes-40Rs/kg\n3-exit')
g=int(input('Enter your choice'))
if(g==1):
q=int(input('enter the quantity'))
bill+=30*q #bill=bill+30
elif(g==2):
q=int(input('enter the quantity'))
bill+=40*q
print('Detergents')
print('1-Surfexcel-30rs\n2-Tide-35Rs\n3-exit')
d=int(input('Enter your choice'))
if(d==1):
q=int(input('enter the quantity'))
bill+=30*q
elif(d==2):
q=int(input('enter the quantity'))
bill+=35*q
print('your total bill is',bill)
ch=input('do you want to exit')
if(ch=='yes'):
print('thank you')
break

Welcome to MEVI-RRIT Super Market


1-Rice Flour-30Rs/kg
2-CornFlakes-40Rs/kg
3-exit
Enter your choice1
enter the quantity2
Detergents
1-Surfexcel-30rs
2-Tide-35Rs
3-exit
Enter your choice3
your total bill is 60
do you want to exitno
1-Rice Flour-30Rs/kg
2-CornFlakes-40Rs/kg
3-exit
Enter your choice2
enter the quantity2
Detergents
https://colab.research.google.com/drive/12E13aeyoERFp9HCRe_iHpw4QNrLZw8HJ#scrollTo=ow9RXnJ8Z6uN 2/3
27/05/2024, 21:04 Day1.ipynb - Colab
1-Surfexcel-30rs
2-Tide-35Rs
3-exit
Enter your choice3
your total bill is 140
do you want to exityes
thank you

a=1
print(type(a))

<class 'int'>

a=10

Start coding or generate with AI.

https://colab.research.google.com/drive/12E13aeyoERFp9HCRe_iHpw4QNrLZw8HJ#scrollTo=ow9RXnJ8Z6uN 3/3
30/05/2024, 07:17 Untitled64.ipynb - Colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#read the data


df=pd.read_csv('/content/zomato_df_bangalore.csv')

#explore the data


df.head(3)

Number
Unnamed: Restaurant Restaurant Food
of Price City Rating
0 ID Name Name
Orders

Masala
0 0 1 Spicy Delight 450 150.50 Koramangala 4.5
Dosa

Butter
1 1 2 Urban Diner 320 200.00 Indiranagar 4.0
Chicken

Paneer
Green
2 2 3 280 250.75 Whitefield 3.8 Butter
Garden
Masala

df.tail(1)

Number
Unnamed: Restaurant Restaurant Food
of Price City Rating
0 ID Name Name
Orders

Mutton
49 49 50 Fusion Feast 390 2650.75 Thanisandra 4.8
Frankie

df.shape

(50, 8)

df.size

400

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 50 non-null int64

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 1/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
1 Restaurant ID 50 non-null int64
2 Restaurant Name 50 non-null object
3 Number of Orders 50 non-null int64
4 Price 50 non-null float64
5 City 50 non-null object
6 Rating 50 non-null float64
7 Food Name 50 non-null object
dtypes: float64(2), int64(3), object(3)
memory usage: 3.2+ KB

#Analyse the data

df.describe()

Unnamed: 0 Restaurant ID Number of Orders Price Rating

count 50.00000 50.00000 50.000000 50.000000 50.000000

mean 24.50000 25.50000 376.400000 1403.389000 4.220000

std 14.57738 14.57738 57.738326 750.542149 0.333197

min 0.00000 1.00000 260.000000 150.500000 3.700000

25% 12.25000 13.25000 340.000000 763.300000 3.925000

50% 24.50000 25.50000 375.000000 1425.625000 4.200000

75% 36.75000 37.75000 410.000000 2038.187500 4.500000

max 49.00000 50.00000 500.000000 2650.750000 4.800000

#visualizing Data

import matplotlib.pyplot as plt

df.groupby('Price')['Number of Orders'].mean()

Price
150.50 450.0
200.00 320.0
250.75 280.0
300.20 500.0
350.00 350.0
400.25 410.0
450.10 290.0
500.75 470.0
550.20 340.0
600.40 330.0
650.60 480.0
700.80 260.0
750.90 390.0
800.50 410.0
850.75 320.0
900.00 460.0
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 2/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
950.25 300.0
1000.50 350.0
1050.75 400.0
1100.00 370.0
1150.25 420.0
1200.50 380.0
1300.00 310.0
1350.25 360.0
1400.50 490.0
1450.75 300.0
1500.00 410.0
1550.25 290.0
1600.50 460.0
1650.75 370.0
1700.00 430.0
1750.25 380.0
1800.50 340.0
1850.75 320.0
1900.00 400.0
1950.25 350.0
2000.50 370.0
2050.75 360.0
2100.00 440.0
2150.25 350.0
2200.50 410.0
2250.75 390.0
2300.00 310.0
2350.25 400.0
2400.50 420.0
2450.75 360.0
2500.00 340.0
2550.25 410.0
2600.50 380.0
2650.75 390.0
Name: Number of Orders, dtype: float64

df.groupby('Food Name')['Price'].mean().plot(kind='pie')

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 3/8
30/05/2024, 07:17 Untitled64.ipynb - Colab

<Axes: ylabel='Price'>

import numpy as np
x=np.array([1,2,3,4,5,6,7,8,9,10])
print(x)
y=x**2
print(y)

[ 1 2 3 4 5 6 7 8 9 10]
[ 1 4 9 16 25 36 49 64 81 100]

print(np.max(x))
print(np.min(x))
print(np.mean(x))
print(np.std(x))
print(np.sum(x))
print(np.sqrt(y))

10
1
5.5
2.8722813232690143
55
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]

ar=x[np.where(x%2==0)]
ar

array([ 2, 4, 6, 8, 10])

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 4/8
30/05/2024, 07:17 Untitled64.ipynb - Colab

a=[[1,2,3],[4,5,6],[7,8,9]]
print(a)
ar=np.array(a)
print(ar)
print(ar[1][2])

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


[[1 2 3]
[4 5 6]
[7 8 9]]
6

ar=np.array([12,13,14,156,12.6,98.7])
m=ar[np.argmax(ar)]
m

156.0

a=[1,2,3,4,5,6,7,8]
ar=np.array(a)
print(ar)
m=ar.reshape(4,2)
print(m)

[1 2 3 4 5 6 7 8]
[[1 2]
[3 4]
[5 6]
[7 8]]

print(x)
print(y)

[ 1 2 3 4 5 6 7 8 9 10]
[ 1 4 9 16 25 36 49 64 81 100]

plt.scatter(x,y,marker='p',color='r')

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 5/8
30/05/2024, 07:17 Untitled64.ipynb - Colab

<matplotlib.collections.PathCollection at 0x7ab0b3e7c1f0>

plt.plot(x,y,marker='o',linestyle='--',color='r',markerfacecolor='b
plt.xlabel('Experence')
plt.ylabel('Salary')
plt.title('EX vs Sal')

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 6/8
30/05/2024, 07:17 Untitled64.ipynb - Colab

Text(0.5, 1.0, 'EX vs Sal')

x=['Apples','Oranges','Papayas','Kiwis']
y=[1257,754,678,986]
plt.bar(x,y,color='red',edgecolor='k')

<BarContainer object of 4 artists>

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 7/8
30/05/2024, 07:17 Untitled64.ipynb - Colab

x=[8,2,6,1,1,4,2]
e=[0.1,0.1,0.1,0.1,0.1,0.2,0.2]
c=['yellow','green','blue','orange','black','red','pink']
l=['College','Eating','Sleeping','Houseold work','Friends','Mobile'

len(x)==len(x)

True

plt.pie(x,labels=l,autopct='%.2f%%',explode=e,colors=c)

([<matplotlib.patches.Wedge at 0x7ab0b1273ac0>,
<matplotlib.patches.Wedge at 0x7ab0b1273a00>,
<matplotlib.patches.Wedge at 0x7ab0b12587f0>,
<matplotlib.patches.Wedge at 0x7ab0b1258e80>,
<matplotlib.patches.Wedge at 0x7ab0b1259510>,
<matplotlib.patches.Wedge at 0x7ab0b1259ba0>,

https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 8/8
31/05/2024, 22:21 Linear Regression.ipynb - Colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df=pd.read_csv('/content/Salary_Data (2).csv')

df.head()

YearsExperience Salary

0 1.1 39343.0

1 1.3 46205.0

2 1.5 37731.0

3 2.0 43525.0

4 2.2 39891.0

df.shape

(30, 2)

df.isnull().sum()

YearsExperience 0
Salary 0
dtype: int64

plt.scatter(df['YearsExperience'],df['Salary'],marker='*')

https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 1/4
31/05/2024, 22:21 Linear Regression.ipynb - Colab

<matplotlib.collections.PathCollection at 0x7e5cb2cba3e0>

x=df.drop('Salary',axis=1)

y=df.Salary

0 39343.0
1 46205.0
2 37731.0
3 43525.0
4 39891.0
5 56642.0
6 60150.0
7 54445.0
8 64445.0
9 57189.0
10 63218.0
11 55794.0
12 56957.0
13 57081.0
14 61111.0
15 67938.0
16 66029.0
17 83088.0
18 81363.0
19 93940.0
20 91738.0
21 98273.0
22 101302.0
23 113812.0

https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 2/4
31/05/2024, 22:21 Linear Regression.ipynb - Colab
24 109431.0
25 105582.0
26 116969.0
27 112635.0
28 122391.0
29 121872.0
Name: Salary, dtype: float64

from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)

xtrain.shape

(24, 1)

xtest.shape

(6, 1)

from sklearn.linear_model import LinearRegression

LR=LinearRegression()

LR.fit(xtrain,ytrain)

▾ LinearRegression
LinearRegression()

pred=LR.predict(xtest)

pred

array([111941.18950598, 63174.58719623, 52445.93468808, 124620.50610651,


53421.26673428, 34889.95785657])

ytest

25 105582.0
12 56957.0
5 56642.0
28 122391.0
6 60150.0
0 39343.0
Name: Salary, dtype: float64

https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 3/4
31/05/2024, 22:21 Linear Regression.ipynb - Colab

a=int(input('Enter exp'))
ar=np.array([a]).reshape(1,-1)
ar

Enter exp15
array([[15]])

salary = LR.predict(ar)

/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X do
warnings.warn(

salary

array([170461.11227768])

Start coding or generate with AI.

https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 4/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df=pd.read_csv('/content/heart 2.csv')

df.head()

cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target

0 125 212 0 1 168 0 1.0 2 2 3 0

0 140 203 1 0 155 1 3.1 0 0 3 0

0 145 174 0 1 125 1 2.6 0 0 3 0

0 148 203 0 1 161 0 0.0 2 1 3 0

0 138 294 1 1 106 0 1.9 1 3 2 0

Next steps: Generate code with df


toggle_off View recommended plots

x=df.drop('target',axis=1)

y=df.target

from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random

from sklearn.linear_model import LogisticRegression

lr=LogisticRegression()

lr.fit(xtrain,ytrain)

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 1/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()

xtest

age sex cp trestbps chol fbs restecg thalach exang oldpeak slope

952 54 0 2 135 304 1 1 170 0 0.0 2

411 56 1 0 125 249 1 0 144 1 1.2 1

363 53 1 2 130 246 1 0 173 0 0.0 2

234 49 0 0 130 269 0 1 163 0 0.0 2

431 65 0 0 150 225 0 0 114 0 1.0 1

... ... ... ... ... ... ... ... ... ... ... ...

947 54 0 2 160 201 0 1 163 0 0.0 2

157 54 1 2 120 258 0 0 147 0 0.4 1

278 55 1 0 160 289 0 0 145 1 0.8 1

404 61 1 0 140 207 0 0 138 1 1.9 2

487 65 1 0 135 254 0 0 127 0 2.8 1

308 rows × 13 columns

Next steps: Generate code with xtest


toggle_off View recommended plots

pred=lr.predict(xtest)

pred

array([1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0,
1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 2/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab
0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0])

ytest.values

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0])

from sklearn.metrics import confusion_matrix,accuracy_score

cm=confusion_matrix(ytest,pred)

cm

array([[105, 31],
[ 25, 147]])

ytest.size

308

ac=accuracy_score(ytest,pred)

ac

0.8181818181818182

Start coding or generate with AI.

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 3/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 4/4
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab

#importing the libraries


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Reading the dataset


df=pd.read_csv('/content/CAR DETAILS FROM CAR DEKHO (2).csv')

#exploring the dataset


df.head()

name year selling_price km_driven fuel seller_type transmission ow

Maruti
0 2007 60000 70000 Petrol Individual Manual
800 AC Ow

Maruti
Wagon
1 2007 135000 50000 Petrol Individual Manual
R LXI Ow
Minor

Hyundai
2 Verna 2012 600000 100000 Diesel Individual Manual
Ow
1.6 SX

Datsun
RediGO
3 2017 250000 46000 Petrol Individual Manual
T Ow
Option

Honda
Amaze Sec
4 2014 450000 141000 Diesel Individual Manual
VX i- Ow
DTEC

df.shape

(4340, 8)

df.size

34720

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4340 entries, 0 to 4339
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 4340 non-null object

https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 1/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
1 year 4340 non-null int64
2 selling_price 4340 non-null int64
3 km_driven 4340 non-null int64
4 fuel 4340 non-null object
5 seller_type 4340 non-null object
6 transmission 4340 non-null object
7 owner 4340 non-null object
dtypes: int64(3), object(5)
memory usage: 271.4+ KB

#data analytics
df.describe()

year selling_price km_driven

count 4340.000000 4.340000e+03 4340.000000

mean 2013.090783 5.041273e+05 66215.777419

std 4.215344 5.785487e+05 46644.102194

min 1992.000000 2.000000e+04 1.000000

25% 2011.000000 2.087498e+05 35000.000000

50% 2014.000000 3.500000e+05 60000.000000

75% 2016.000000 6.000000e+05 90000.000000

max 2020.000000 8.900000e+06 806599.000000

df.columns

Index(['name', 'year', 'selling_price', 'km_driven', 'fuel', 'seller_type',


'transmission', 'owner'],
dtype='object')

df.name.value_counts().sum()

4340

len(df.name.unique())

1491

df.fuel.value_counts().plot(kind='bar')

https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 2/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab

<Axes: xlabel='fuel'>

df.isnull().sum()

name 0
year 0
selling_price 0
km_driven 0
fuel 0
seller_type 0
transmission 0
owner 0
dtype: int64

df['fuel'].unique()

array(['Petrol', 'Diesel', 'CNG', 'LPG', 'Electric'], dtype=object)

https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 3/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab

def FuelN(string):
v=0
if(string=='Petrol'):
v=0
elif(string=='Diesel'):
v=1
elif(string=='CNG'):
v=2
elif(string=='LPG'):
v=3
elif(string=='Electric'):
v=4
return v

FuelN('Diesel')

df['FuelN']=df['fuel'].apply(FuelN)

df=df.drop('fuel',axis=1)
df.head()

name year selling_price km_driven seller_type transmission owner Fu

Maruti First
0 2007 60000 70000 Individual Manual
800 AC Owner

Maruti
Wagon First
1 2007 135000 50000 Individual Manual
R LXI Owner
Minor

Hyundai
First
2 Verna 2012 600000 100000 Individual Manual
Owner
1.6 SX

Datsun
RediGO First
3 2017 250000 46000 Individual Manual
T Owner
Option

Honda
Amaze Second
4 2014 450000 141000 Individual Manual
VX i- Owner
DTEC

from sklearn.preprocessing import LabelEncoder

l=LabelEncoder()

https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 4/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab

df['seller_typeN']=l.fit_transform(df['seller_type'])

df.seller_type.unique()

array(['Individual', 'Dealer', 'Trustmark Dealer'], dtype=object)

df.seller_typeN.unique()

array([1, 0, 2])

df=df.drop('seller_type',axis=1)

#repeat the coversion for transmission, owner , names of the car

#split the data into features and target

#split the data into train and test

#import model like linear regression

#build the model

#train the model

#test the model ytest values

#take the user input and convert into numpy array as a row

#then predict for user input

Start coding or generate with AI.

https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 5/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df=pd.read_csv('/content/heart 2.csv')

df.head()

age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca

0 52 1 0 125 212 0 1 168 0 1.0 2 2

1 53 1 0 140 203 1 0 155 1 3.1 0 0

2 70 1 0 145 174 0 1 125 1 2.6 0 0

3 61 1 0 148 203 0 1 161 0 0.0 2 1

4 62 0 0 138 294 1 1 106 0 19 1 3

Next steps: Generate code with df


toggle_off View recommended plots

x=df.drop('target',axis=1)

y=df.target

from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random_state=0)

from sklearn.linear_model import LogisticRegression

lr=LogisticRegression()

lr.fit(xtrain,ytrain)

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()

xtest

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 1/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab

age sex cp trestbps chol fbs restecg thalach exang oldpeak slope

807 44 1 2 130 233 0 1 179 1 0.4 2

27 58 0 1 136 319 1 0 152 0 0.0 2

77 63 1 0 140 187 0 0 144 1 4.0 2

406 58 1 2 140 211 1 0 165 0 0.0 2

886 61 1 0 120 260 0 1 140 1 3.6 1

... ... ... ... ... ... ... ... ... ... ... ...

808 51 1 2 94 227 0 1 154 1 0.0 2

984 59 1 0 135 234 0 1 161 0 0.5 1

717 56 1 2 130 256 1 0 142 1 0.6 1

167 57 0 0 120 354 0 1 163 1 0.6 2

878 54 1 0 120 188 0 1 113 0 1.4 1

308 13 l

Next steps: Generate code with xtest


toggle_off View recommended plots

pred=lr.predict(xtest)

pred

array([1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0])

ytest.values

array([1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0])

from sklearn.metrics import confusion_matrix,accuracy_score

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 2/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab

cm=confusion_matrix(ytest,pred)

cm

array([[118, 27],
[ 13, 150]])

ytest.size

308

LGac=accuracy_score(ytest,pred)*100
print('Logistic Regression Accuracy->',LGac)

Logistic Regression Accuracy-> 87.01298701298701

from sklearn.tree import DecisionTreeClassifier

DT=DecisionTreeClassifier()

DT.fit(xtrain,ytrain)

▾ DecisionTreeClassifier
DecisionTreeClassifier()

predDT=DT.predict(xtest)

DTac=accuracy_score(ytest,predDT)*100
print(' Decision Tree Accuracy->',DTac)

Decision Tree Accuracy-> 99.02597402597402

from sklearn.ensemble import RandomForestClassifier

RT=RandomForestClassifier(n_estimators=101)

RT.fit(xtrain,ytrain)

▾ RandomForestClassifier
RandomForestClassifier(n_estimators=101)

RTpred=RT.predict(xtest)

RTac=accuracy_score(ytest,RTpred)*100
print(' RandomForestClassifier Accuracy->',RTac)

RandomForestClassifier Accuracy-> 99.02597402597402

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 3/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab

l=['RandomForestClassifier','DecisionTreeClassifier','LogisticRegression']
ac=[LGac,DTac,RTac]

plt.barh(l,ac,color='red',edgecolor='blue')

<BarContainer object of 3 artists>

from sklearn.svm import SVC

u=np.array([61,0,1,148,203,0,1,161,2,0.0,2,1,3]).reshape(1,-1)
ans=RT.predict(u)
if(int(ans)==0):
print('Patient will not have any heart disease')
else:
print('Patient will have heart disease')

Patient will have heart disease


/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have va
warnings.warn(
<ipython-input-126-cb483a7c6719>:3: DeprecationWarning: Conversion of an array with ndim > 0
if(int(ans)==0):

Start coding or generate with AI.

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 4/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 5/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df=pd.read_csv('/content/diabetes.csv')

df.head()

Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPe

0 6 148 72 35 0 33.6

1 1 85 66 29 0 26.6

2 8 183 64 0 0 23.3

3 1 89 66 23 94 28.1

4 0 137 40 35 168 43.1

x=df.drop('Outcome',axis=1)

y=df.Outcome

from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=28)

from sklearn.linear_model import LogisticRegression

lr=LogisticRegression()

lr.fit(xtrain,ytrain)

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()

xtest

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 1/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab

Pregnancies Glucose BloodPressure SkinThickness Insulin BMI Diabetes

728 2 175 88 0 0 22.9

392 1 131 64 14 415 23.7

68 1 95 66 13 38 19.6

48 7 103 66 32 0 39.1

74 1 79 75 30 0 32.0

... ... ... ... ... ... ...

412 1 143 84 23 310 42.4

233 4 122 68 0 0 35.0

619 0 119 0 0 0 32.4

557 8 110 76 0 0 27.8

301 2 144 58 33 135 31.6

154 8 l

pred=lr.predict(xtest)

pred

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0])

ytest.values

array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1])

from sklearn.metrics import confusion_matrix,accuracy_score

cm=confusion_matrix(ytest,pred)

cm

array([[93, 10],
[19, 32]])

ytest.size

154

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 2/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab

LGac=accuracy_score(ytest,pred)*100
print('Logistic Regression Accuracy->',LGac)

Logistic Regression Accuracy-> 81.16883116883116

from sklearn.tree import DecisionTreeClassifier

DT=DecisionTreeClassifier()

DT.fit(xtrain,ytrain)

▾ DecisionTreeClassifier
DecisionTreeClassifier()

predDT=DT.predict(xtest)

DTac=accuracy_score(ytest,predDT)*100
print(' Decision Tree Accuracy->',DTac)

Decision Tree Accuracy-> 70.77922077922078

from sklearn.ensemble import RandomForestClassifier,VotingClassifier

RT=RandomForestClassifier(n_estimators=101)

RT.fit(xtrain,ytrain)

▾ RandomForestClassifier
RandomForestClassifier(n_estimators=101)

RTpred=RT.predict(xtest)

RTac=accuracy_score(ytest,RTpred)*100
print(' RandomForestClassifier Accuracy->',RTac)

RandomForestClassifier Accuracy-> 79.22077922077922

vcLgDt=VotingClassifier(
estimators=[
('Lg',lr),
('DT',DT),
],voting='soft'
)
vcLgDt.fit(xtrain,ytrain)
vcLgDtpred=vcLgDt.predict(xtest)
vcLgDtac=accuracy_score(ytest,vcLgDtpred)*100
print('Accuracy of combined Algorithm LR and DT is',vcLgDtac)

Accuracy of combined Algorithm LR and DT is 70.12987012987013


/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWa
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 3/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(

vcLgRt=VotingClassifier(
estimators=[
('Lg',lr),
('RT',RT),
],voting='soft'
)
vcLgRt.fit(xtrain,ytrain)
vcLgRtpred=vcLgRt.predict(xtest)
vcLgRtac=accuracy_score(ytest,vcLgRtpred)*100
print('Accuracy of combined Algorithm LR and RT is',vcLgRtac)

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWa
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
Accuracy of combined Algorithm LR and RT is 81.16883116883116

vcLgRtDT=VotingClassifier(
estimators=[
('RT',RT),
('Lg',lr),
],voting='soft'
)
vcLgRtDT.fit(xtrain,ytrain)
vcLgRtDTpred=vcLgRtDT.predict(xtest)
vcLgRtDTac=accuracy_score(ytest,vcLgRtDTpred)*100
print('Accuracy of combined Algorithm LR and RT is',vcLgRtDTac)

Accuracy of combined Algorithm LR and RT is 79.87012987012987


/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWa
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(

l=['RandomForestClassifier','DecisionTreeClassifier','LogisticRegression']
ac=[LGac,DTac,RTac]

plt.barh(l,ac,color='red',edgecolor='blue')

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 4/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab

<BarContainer object of 3 artists>

from sklearn.svm import SVC

u=np.array([2,144,58,33,235,31.6,0.422,35]).reshape(1,-1)
ans=RT.predict(u)
if(int(ans)==0):
print('Patient will not have any Diabeties')
else:
print('Patient will have Diabeties')

Patient will have Diabeties


/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have va
warnings.warn(
<ipython-input-212-a2d5fed3c7b4>:3: DeprecationWarning: Conversion of an array with ndim > 0
if(int(ans)==0):

Start coding or generate with AI.

https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 5/5
02/06/2024, 00:00 KMeans.ipynb - Colab

import matplotlib.pyplot as plt


import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.metrics import silhouette_score, calinski_harabasz_sco

data=pd.read_csv('/content/Mall_Customers.csv')
data.head()

CustomerID Gender Age Annual Income (k$) Spending Score (1-100)

0 1 Male 19 15 39

1 2 Male 21 15 81

2 3 Female 20 16 6

3 4 Female 23 16 77

4 5 Female 31 17 40

data['Spending Score (1-100)'].value_counts()

Spending Score (1-100)


42 8
55 7
46 6
73 6
35 5
..
31 1
44 1
53 1
65 1
18 1
Name: count, Length: 84, dtype: int64

features=data.columns
lb=LabelEncoder()
data['GenderN']=lb.fit_transform(data['Gender'])

data=data.drop('Gender',axis=1)
data=data.drop('CustomerID',axis=1)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):

https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 1/6
02/06/2024, 00:00 KMeans.ipynb - Colab
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 200 non-null int64
1 Annual Income (k$) 200 non-null int64
2 Spending Score (1-100) 200 non-null int64
3 GenderN 200 non-null int64
dtypes: int64(4)
memory usage: 6.4 KB

from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
scaled_features = scaler.fit_transform(data)
scaled_features

array([[-1.42456879, -1.73899919, -0.43480148, 1.12815215],


[-1.28103541, -1.73899919, 1.19570407, 1.12815215],
[-1.3528021 , -1.70082976, -1.71591298, -0.88640526],
[-1.13750203, -1.70082976, 1.04041783, -0.88640526],
[-0.56336851, -1.66266033, -0.39597992, -0.88640526],
[-1.20926872, -1.66266033, 1.00159627, -0.88640526],
[-0.27630176, -1.62449091, -1.71591298, -0.88640526],
[-1.13750203, -1.62449091, 1.70038436, -0.88640526],
[ 1.80493225, -1.58632148, -1.83237767, 1.12815215],
[-0.6351352 , -1.58632148, 0.84631002, -0.88640526],
[ 2.02023231, -1.58632148, -1.4053405 , 1.12815215],
[-0.27630176, -1.58632148, 1.89449216, -0.88640526],
[ 1.37433211, -1.54815205, -1.36651894, -0.88640526],
[-1.06573534, -1.54815205, 1.04041783, -0.88640526],
[-0.13276838, -1.54815205, -1.44416206, 1.12815215],
[-1.20926872, -1.54815205, 1.11806095, 1.12815215],
[-0.27630176, -1.50998262, -0.59008772, -0.88640526],
[-1.3528021 , -1.50998262, 0.61338066, 1.12815215],
[ 0.94373197, -1.43364376, -0.82301709, 1.12815215],
[-0.27630176, -1.43364376, 1.8556706 , -0.88640526],
[-0.27630176, -1.39547433, -0.59008772, 1.12815215],
[-0.99396865, -1.39547433, 0.88513158, 1.12815215],
[ 0.51313183, -1.3573049 , -1.75473454, -0.88640526],
[-0.56336851, -1.3573049 , 0.88513158, 1.12815215],
[ 1.08726535, -1.24279661, -1.4053405 , -0.88640526],
[-0.70690189, -1.24279661, 1.23452563, 1.12815215],
[ 0.44136514, -1.24279661, -0.7065524 , -0.88640526],
[-0.27630176, -1.24279661, 0.41927286, 1.12815215],
[ 0.08253169, -1.20462718, -0.74537397, -0.88640526],
[-1.13750203, -1.20462718, 1.42863343, -0.88640526],
[ 1.51786549, -1.16645776, -1.7935561 , 1.12815215],
[-1.28103541, -1.16645776, 0.88513158, -0.88640526],
[ 1.01549866, -1.05194947, -1.7935561 , 1.12815215],
[-1.49633548, -1.05194947, 1.62274124, 1.12815215],
[ 0.7284319 , -1.05194947, -1.4053405 , -0.88640526],
[-1.28103541, -1.05194947, 1.19570407, -0.88640526],
[ 0.22606507, -1.01378004, -1.28887582, -0.88640526],
[-0.6351352 , -1.01378004, 0.88513158, -0.88640526],
[-0.20453507, -0.89927175, -0.93948177, -0.88640526],
[-1.3528021 , -0.89927175, 0.96277471, -0.88640526],
[ 1.87669894, -0.86110232, -0.59008772, -0.88640526],
[-1.06573534, -0.86110232, 1.62274124, 1.12815215],
[ 0.65666521, -0.82293289, -0.55126616, 1.12815215],
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 2/6
02/06/2024, 00:00 KMeans.ipynb - Colab
[-0.56336851, -0.82293289, 0.41927286, -0.88640526],
[ 0.7284319 , -0.82293289, -0.86183865, -0.88640526],
[-1.06573534, -0.82293289, 0.5745591 , -0.88640526],
[ 0.80019859, -0.78476346, 0.18634349, -0.88640526],
[-0.85043527, -0.78476346, -0.12422899, -0.88640526],
[-0.70690189, -0.78476346, -0.3183368 , -0.88640526],
[-0.56336851, -0.78476346, -0.3183368 , -0.88640526],
[ 0.7284319 , -0.70842461, 0.06987881, -0.88640526],
[-0.41983513, -0.70842461, 0.38045129, 1.12815215],
[-0.56336851, -0.67025518, 0.14752193, -0.88640526],
[ 1.4460988 , -0.67025518, 0.38045129, 1.12815215],
[ 0.80019859, -0.67025518, -0.20187212, -0.88640526],
[ 0.58489852, -0.67025518, -0.35715836, 1.12815215],
[ 0.87196528, -0.63208575, -0.00776431, -0.88640526],
[ 2.16376569, -0.63208575, -0.16305055, 1.12815215],

from sklearn.cluster import KMeans


wcss = []
for i in range(1, 11):
kmeans = KMeans(random_state=42, n_clusters=i)
kmeans.fit(scaled_features)
wcss.append(kmeans.inertia_)

# Plot the elbow graph


plt.figure(figsize=(10, 5))
plt.plot(range(1, 11), wcss, marker='o', linestyle='--')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 3/6
02/06/2024, 00:00 KMeans.ipynb - Colab

/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(

optimal_clusters = 7
kmeans = KMeans(random_state=42, n_clusters=optimal_clusters)
clusters = kmeans.fit_predict(scaled_features)

/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(

https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 4/6
02/06/2024, 00:00 KMeans.ipynb - Colab

data['Cluster'] = clusters
data

Age Annual Income (k$) Spending Score (1-100) GenderN Cluster

0 19 15 39 1 3

1 21 15 81 1 3

2 20 16 6 0 2

3 23 16 77 0 2

4 31 17 40 0 2

... ... ... ... ... ...

195 35 120 79 0 1

196 45 126 28 0 4

197 32 126 74 1 5

198 32 137 18 1 4

199 30 137 83 1 5

200 rows × 5 columns

plt.figure(figsize=(10, 5))
plt.scatter(data['Annual Income (k$)'], data['Spending Score (1-100
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.show()

https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 5/6
02/06/2024, 00:00 KMeans.ipynb - Colab

# Calculate and print evaluation metrics


silhouette_avg = silhouette_score(scaled_features, clusters)
calinski_harabasz = calinski_harabasz_score(scaled_features, cluste
davies_bouldin = davies_bouldin_score(scaled_features, clusters)

print(f'Silhouette Score: {silhouette_avg:.3f}')


print(f'Calinski-Harabasz Index: {calinski_harabasz:.3f}')
print(f'Davies-Bouldin Index: {davies_bouldin:.3f}')

Silhouette Score: 0.357


Calinski-Harabasz Index: 76.778
Davies-Bouldin Index: 0.980

1. Silhouette Score Range: -1 to 1 Interpretation: Close to 1: Indicates that the data points are
well-clustered, with data points very close to the centroid of their cluster and far from other
clusters. Close to 0: Indicates that the data points are on or very close to the boundary
between clusters, implying overlapping clusters. Negative Values: Indicate that the data
points may have been assigned to the wrong clusters. Desired Value: Ideally, you want a
score close to 1, but a value above 0.5 is generally considered good. Values around 0.25-
0.5 may be acceptable depending on the complexity and nature of the data.
2. Calinski-Harabasz Index Range: No fixed range (higher is better) Interpretation: Higher
Values: Indicate that the clusters are dense and well-separated from each other. Desired
Value: There is no absolute threshold, but higher values are better. You should compare the
index across different models or configurations; the configuration with the highest Calinski-
Harabasz Index is considered the best.
3. Davies-Bouldin Index Range: 0 to ∞ (lower is better) Interpretation: Lower Values: Indicate
that the clusters are compact and well-separated from each other. Desired Value: Values
closer to 0 are better. Similar to the Calinski-Harabasz Index, you should compare the index
across different models or configurations; the configuration with the lowest Davies-Bouldin
Index is considered the best.

https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 6/6
27/05/2024, 10:27 car sales prediction.ipynb - Colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df=pd.read_csv('/content/CAR DETAILS FROM CAR DEKHO.csv')

df1=pd.read_csv('/content/CAR DETAILS FROM CAR DEKHO.csv')

df.head()

name year selling_price km_driven fuel seller_type transmission ow

Maruti
0 2007 60000 70000 Petrol Individual Manual
800 AC Ow

Maruti
Wagon
1 2007 135000 50000 Petrol Individual Manual
R LXI Ow
Minor

df.size

34720

df.shape

(4340, 8)

df.describe()

year selling_price km_driven

count 4340.000000 4.340000e+03 4340.000000

mean 2013.090783 5.041273e+05 66215.777419

std 4.215344 5.785487e+05 46644.102194

min 1992.000000 2.000000e+04 1.000000

25% 2011.000000 2.087498e+05 35000.000000

50% 2014.000000 3.500000e+05 60000.000000

75% 2016.000000 6.000000e+05 90000.000000

max 2020.000000 8.900000e+06 806599.000000

df.info

pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None,
max_cols: int | None=None, memory_usage: bool | str | None=None,
show_counts: bool | None=None) -> None

Print a concise summary of a DataFrame.

This method prints information about a DataFrame including


the index dtype and columns, non-null values and memory usage.

df.columns

Index(['name', 'year', 'selling_price', 'km_driven', 'fuel', 'seller_type',


'transmission', 'owner'],
dtype='object')

df['fuel'].value_counts().plot(kind='bar')

https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 1/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab

<Axes: xlabel='fuel'>

df['transmission'].value_counts().plot(kind='bar')

<Axes: xlabel='transmission'>

df.groupby('transmission')['selling_price'].min()

transmission
Automatic 79000
Manual 20000
Name: selling_price, dtype: int64

Data Preprocessing

df['seller_type'].value_counts()

seller_type
Individual 3244
Dealer 994
Trustmark Dealer 102
Name: count, dtype: int64

https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 2/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
def stc(string):
v=0
if(string=='Individual'):
v=0
elif(string=='Dealer'):
v=1
elif(string=='Trustmark Dealer'):
v=2
return v

df['seller_type_N']=df['seller_type'].apply(stc)

df[df['seller_type']=='Dealer']

name year selling_price km_driven fuel seller_type transmissio

Toyota
Corolla
12 2018 1650000 25000 Petrol Dealer Automati
Altis 1.8 VL
CVT

Toyota
Corolla
25 2018 1650000 25000 Petrol Dealer Automati
Altis 1.8 VL
CVT

Maruti Ciaz
26 2015 585000 24000 Petrol Dealer Manua
VXi Plus

Hyundai
27 Venue SX 2019 1195000 5000 Diesel Dealer Manua
Opt Diesel

Jaguar XF
29 2.2 Litre 2014 1964999 28000 Diesel Dealer Automati
Luxury

... ... ... ... ... ... ... .

Audi Q5
3.0 TDI
4304 2018 3899000 22000 Diesel Dealer Automati
Quattro
Technology

Hyundai
4306 i10 Sportz 2011 235000 43100 Petrol Dealer Manua
1.2

df['transmission'].unique()

array(['Manual', 'Automatic'], dtype=object)

def ttc(string):
v=0
if(string=='Manual'):
v=0
elif(string=='Automatic'):
v=1
return v

df['transmissionN']=df['transmission'].apply(ttc)

from sklearn.preprocessing import LabelEncoder

lef=LabelEncoder()

df['fuelN']=lef.fit_transform(df['fuel'])

df['fuelN']

0 4
1 4
2 1
3 4
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 3/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
4 1
..
4335 1
4336 1
4337 4
4338 1
4339 4
Name: fuelN, Length: 4340, dtype: int64

df['fuel'].unique()

array(['Petrol', 'Diesel', 'CNG', 'LPG', 'Electric'], dtype=object)

df=df.drop('fuel',axis=1)

Start coding or generate with AI.

df=df.drop('seller_type',axis=1)

df=df.drop("transmission",axis=1)

leo=LabelEncoder()
df['ownerN']=leo.fit_transform(df['owner'])
df=df.drop('owner',axis=1)

lec=LabelEncoder()
df['nameN']=lec.fit_transform(df['name'])
df=df.drop('name',axis=1)

df.head()

year selling_price km_driven seller_type_N transmissionN fuelN ownerN

0 2007 60000 70000 0 0 4 0

1 2007 135000 50000 0 0 4 0

2 2012 600000 100000 0 0 1 0

3 2017 250000 46000 0 0 4 0

4 2014 450000 141000 0 0 1 2

df.columns

Index(['year', 'selling_price', 'km_driven', 'seller_type_N', 'transmissionN',


'fuelN', 'ownerN', 'nameN'],
dtype='object')

X=df.drop('selling_price',axis=1).values.reshape(4340,7)
X.shape

(4340, 7)

y=df.selling_price.values.reshape(-1,1)
y

array([[ 60000],
[135000],
[600000],
...,
[110000],
[865000],
[225000]])

from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.3)

from sklearn.linear_model import LinearRegression

https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 4/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
LR=LinearRegression()

LR.fit(xtrain,ytrain)

▾ LinearRegression
LinearRegression()

pred=LR.predict(xtest)

df1.name.unique()

array(['Maruti 800 AC', 'Maruti Wagon R LXI Minor',


'Hyundai Verna 1.6 SX', ..., 'Mahindra Verito 1.5 D6 BSIII',
'Toyota Innova 2.5 VX (Diesel) 8 Seater BS IV',
'Hyundai i20 Magna 1.4 CRDi'], dtype=object)

df1.transmission.unique()

array(['Manual', 'Automatic'], dtype=object)

Start coding or generate with AI.

https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 5/5

You might also like