Codes Frome Dayy 1 to Day 6

27/05/2024, 21:04 Day1.
ipynb - Colab
print('hello world')
hello world
t=1
bal=100000
mpin=1234
print('WelCome to Mevi-RRIT Bank ')
while(t<=3):
upin=int(input('Enter Pin'))
if(upin==mpin):
print('Sucessfully Loggedin')
print('1-Withdraw\n2-Deposit')
ch=input('Enter your choice')
if(ch=='1'):
amt=int(input('Enter the Amount to withdraw'))
bal=bal-amt
print('your new balance is',bal)
elif(ch=='2'):
amt=int(input('Enter the amount to deposit'))
bal=bal+amt
print('your new balance is',bal)
break
elif(t==3):
print('sorry account blocked')
else:
print('Invalid Pin Try again')
t=t+1
WelCome to Mevi-RRIT Bank

Enter Pin1234
Sucessfully Loggedin
1-Withdraw
2-Deposit
Enter your choice2
Enter the amount to deposit20000
your new balance is 120000
v=2
while(v):
ans=input('Please give me your phone')
if(ans=='yes'):
print('thank you, you are so sweet')
v=0
else:
print('please please please')
https://colab.research.google.com/drive/12E13aeyoERFp9HCRe_iHpw4QNrLZw8HJ#scrollTo=ow9RXnJ8Z6uN 1/3
27/05/2024, 21:04 Day1.ipynb - Colab
Please give me your phoneno

please please please
Please give me your phoneni
please please please
Please give me your phoneyes
thank you, you are so sweet
bill=0
print('Welcome to MEVI-RRIT Super Market')
while(True):
print('1-Rice Flour-30Rs/kg\n2-CornFlakes-40Rs/kg\n3-exit')
g=int(input('Enter your choice'))
if(g==1):
q=int(input('enter the quantity'))
bill+=30*q #bill=bill+30
elif(g==2):
bill+=40*q
print('Detergents')
print('1-Surfexcel-30rs\n2-Tide-35Rs\n3-exit')
d=int(input('Enter your choice'))
if(d==1):
bill+=30*q
elif(d==2):
bill+=35*q
print('your total bill is',bill)
ch=input('do you want to exit')
if(ch=='yes'):
print('thank you')
break
Welcome to MEVI-RRIT Super Market

1-Rice Flour-30Rs/kg
2-CornFlakes-40Rs/kg
3-exit
Enter your choice1
enter the quantity2
Detergents
1-Surfexcel-30rs
2-Tide-35Rs
3-exit
Enter your choice3
your total bill is 60
do you want to exitno
1-Rice Flour-30Rs/kg
2-CornFlakes-40Rs/kg
3-exit
Enter your choice2
enter the quantity2
Detergents
27/05/2024, 21:04 Day1.ipynb - Colab
1-Surfexcel-30rs
2-Tide-35Rs
3-exit
Enter your choice3
your total bill is 140
do you want to exityes
thank you
a=1
print(type(a))
<class 'int'>
a=10
Start coding or generate with AI.
30/05/2024, 07:17 Untitled64.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#read the data

df=pd.read_csv('/content/zomato_df_bangalore.csv')
#explore the data

df.head(3)
Number
Unnamed: Restaurant Restaurant Food
of Price City Rating
0 ID Name Name
Orders
Masala
0 0 1 Spicy Delight 450 150.50 Koramangala 4.5
Dosa
Butter
1 1 2 Urban Diner 320 200.00 Indiranagar 4.0
Chicken
Paneer
Green
2 2 3 280 250.75 Whitefield 3.8 Butter
Garden
Masala
df.tail(1)
Number
Unnamed: Restaurant Restaurant Food
of Price City Rating
0 ID Name Name
Orders
Mutton
49 49 50 Fusion Feast 390 2650.75 Thanisandra 4.8
Frankie
df.shape
(50, 8)
df.size
400
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 50 non-null int64
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 1/8
1 Restaurant ID 50 non-null int64
2 Restaurant Name 50 non-null object
3 Number of Orders 50 non-null int64
4 Price 50 non-null float64
5 City 50 non-null object
6 Rating 50 non-null float64
7 Food Name 50 non-null object
dtypes: float64(2), int64(3), object(3)
memory usage: 3.2+ KB
#Analyse the data
df.describe()
Unnamed: 0 Restaurant ID Number of Orders Price Rating
count 50.00000 50.00000 50.000000 50.000000 50.000000
mean 24.50000 25.50000 376.400000 1403.389000 4.220000
std 14.57738 14.57738 57.738326 750.542149 0.333197
min 0.00000 1.00000 260.000000 150.500000 3.700000
25% 12.25000 13.25000 340.000000 763.300000 3.925000
50% 24.50000 25.50000 375.000000 1425.625000 4.200000
75% 36.75000 37.75000 410.000000 2038.187500 4.500000
max 49.00000 50.00000 500.000000 2650.750000 4.800000
#visualizing Data
df.groupby('Price')['Number of Orders'].mean()
Price
150.50 450.0
200.00 320.0
250.75 280.0
300.20 500.0
350.00 350.0
400.25 410.0
450.10 290.0
500.75 470.0
550.20 340.0
600.40 330.0
650.60 480.0
700.80 260.0
750.90 390.0
800.50 410.0
850.75 320.0
900.00 460.0
950.25 300.0
1000.50 350.0
1050.75 400.0
1100.00 370.0
1150.25 420.0
1200.50 380.0
1300.00 310.0
1350.25 360.0
1400.50 490.0
1450.75 300.0
1500.00 410.0
1550.25 290.0
1600.50 460.0
1650.75 370.0
1700.00 430.0
1750.25 380.0
1800.50 340.0
1850.75 320.0
1900.00 400.0
1950.25 350.0
2000.50 370.0
2050.75 360.0
2100.00 440.0
2150.25 350.0
2200.50 410.0
2250.75 390.0
2300.00 310.0
2350.25 400.0
2400.50 420.0
2450.75 360.0
2500.00 340.0
2550.25 410.0
2600.50 380.0
2650.75 390.0
Name: Number of Orders, dtype: float64
df.groupby('Food Name')['Price'].mean().plot(kind='pie')
<Axes: ylabel='Price'>
import numpy as np
x=np.array([1,2,3,4,5,6,7,8,9,10])
print(x)
y=x**2
print(y)
[ 1 2 3 4 5 6 7 8 9 10]
[ 1 4 9 16 25 36 49 64 81 100]
print(np.max(x))
print(np.min(x))
print(np.mean(x))
print(np.std(x))
print(np.sum(x))
print(np.sqrt(y))
10
1
5.5
2.8722813232690143
55
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
ar=x[np.where(x%2==0)]
ar
array([ 2, 4, 6, 8, 10])
a=[[1,2,3],[4,5,6],[7,8,9]]
print(a)
ar=np.array(a)
print(ar)
print(ar[1][2])
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

[[1 2 3]
[4 5 6]
[7 8 9]]
6
ar=np.array([12,13,14,156,12.6,98.7])
m=ar[np.argmax(ar)]
m
156.0
a=[1,2,3,4,5,6,7,8]
ar=np.array(a)
print(ar)
m=ar.reshape(4,2)
print(m)
[1 2 3 4 5 6 7 8]
[[1 2]
[3 4]
[5 6]
[7 8]]
print(x)
print(y)
[ 1 2 3 4 5 6 7 8 9 10]
[ 1 4 9 16 25 36 49 64 81 100]
plt.scatter(x,y,marker='p',color='r')
<matplotlib.collections.PathCollection at 0x7ab0b3e7c1f0>
plt.plot(x,y,marker='o',linestyle='--',color='r',markerfacecolor='b
plt.xlabel('Experence')
plt.ylabel('Salary')
plt.title('EX vs Sal')
Text(0.5, 1.0, 'EX vs Sal')
x=['Apples','Oranges','Papayas','Kiwis']
y=[1257,754,678,986]
plt.bar(x,y,color='red',edgecolor='k')
<BarContainer object of 4 artists>
x=[8,2,6,1,1,4,2]
e=[0.1,0.1,0.1,0.1,0.1,0.2,0.2]
c=['yellow','green','blue','orange','black','red','pink']
l=['College','Eating','Sleeping','Houseold work','Friends','Mobile'
len(x)==len(x)
True
plt.pie(x,labels=l,autopct='%.2f%%',explode=e,colors=c)
([<matplotlib.patches.Wedge at 0x7ab0b1273ac0>,
<matplotlib.patches.Wedge at 0x7ab0b1273a00>,
<matplotlib.patches.Wedge at 0x7ab0b12587f0>,
<matplotlib.patches.Wedge at 0x7ab0b1258e80>,
<matplotlib.patches.Wedge at 0x7ab0b1259510>,
<matplotlib.patches.Wedge at 0x7ab0b1259ba0>,
31/05/2024, 22:21 Linear Regression.ipynb - Colab
import pandas as pd
import numpy as np
df=pd.read_csv('/content/Salary_Data (2).csv')
df.head()
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
df.shape
(30, 2)
df.isnull().sum()
YearsExperience 0
Salary 0
dtype: int64
plt.scatter(df['YearsExperience'],df['Salary'],marker='*')
https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 1/4
<matplotlib.collections.PathCollection at 0x7e5cb2cba3e0>
x=df.drop('Salary',axis=1)
y=df.Salary
0 39343.0
1 46205.0
2 37731.0
3 43525.0
4 39891.0
5 56642.0
6 60150.0
7 54445.0
8 64445.0
9 57189.0
10 63218.0
11 55794.0
12 56957.0
13 57081.0
14 61111.0
15 67938.0
16 66029.0
17 83088.0
18 81363.0
19 93940.0
20 91738.0
21 98273.0
22 101302.0
23 113812.0
24 109431.0
25 105582.0
26 116969.0
27 112635.0
28 122391.0
29 121872.0
Name: Salary, dtype: float64
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)
xtrain.shape
(24, 1)
xtest.shape
(6, 1)
from sklearn.linear_model import LinearRegression
LR=LinearRegression()
LR.fit(xtrain,ytrain)
▾ LinearRegression
LinearRegression()
pred=LR.predict(xtest)
pred
array([111941.18950598, 63174.58719623, 52445.93468808, 124620.50610651,

53421.26673428, 34889.95785657])
ytest
25 105582.0
12 56957.0
5 56642.0
28 122391.0
6 60150.0
0 39343.0
Name: Salary, dtype: float64
a=int(input('Enter exp'))
ar=np.array([a]).reshape(1,-1)
ar
Enter exp15
array([[15]])
salary = LR.predict(ar)
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X do
warnings.warn(
salary
array([170461.11227768])
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab
import pandas as pd
import numpy as np
df=pd.read_csv('/content/heart 2.csv')
df.head()
cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target
0 125 212 0 1 168 0 1.0 2 2 3 0
0 140 203 1 0 155 1 3.1 0 0 3 0
0 145 174 0 1 125 1 2.6 0 0 3 0
0 148 203 0 1 161 0 0.0 2 1 3 0
0 138 294 1 1 106 0 1.9 1 3 2 0
Next steps: Generate code with df

toggle_off View recommended plots
x=df.drop('target',axis=1)
y=df.target
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(xtrain,ytrain)
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 1/4
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()
xtest
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope
952 54 0 2 135 304 1 1 170 0 0.0 2
411 56 1 0 125 249 1 0 144 1 1.2 1
363 53 1 2 130 246 1 0 173 0 0.0 2
234 49 0 0 130 269 0 1 163 0 0.0 2
431 65 0 0 150 225 0 0 114 0 1.0 1
... ... ... ... ... ... ... ... ... ... ... ...
947 54 0 2 160 201 0 1 163 0 0.0 2
157 54 1 2 120 258 0 0 147 0 0.4 1
278 55 1 0 160 289 0 0 145 1 0.8 1
404 61 1 0 140 207 0 0 138 1 1.9 2
487 65 1 0 135 254 0 0 127 0 2.8 1
308 rows × 13 columns
Next steps: Generate code with xtest

pred=lr.predict(xtest)
pred
array([1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0,
1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0])
ytest.values
array([1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0])
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(ytest,pred)
cm
array([[105, 31],
[ 25, 147]])
ytest.size
308
ac=accuracy_score(ytest,pred)
ac
0.8181818181818182
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
#importing the libraries

import pandas as pd
import numpy as np
#Reading the dataset

df=pd.read_csv('/content/CAR DETAILS FROM CAR DEKHO (2).csv')
#exploring the dataset

df.head()
name year selling_price km_driven fuel seller_type transmission ow
Maruti
0 2007 60000 70000 Petrol Individual Manual
800 AC Ow
Maruti
Wagon
R LXI Ow
Minor
Hyundai
2 Verna 2012 600000 100000 Diesel Individual Manual
Ow
1.6 SX
Datsun
RediGO
T Ow
Option
Honda
Amaze Sec
4 2014 450000 141000 Diesel Individual Manual
VX i- Ow
DTEC
df.shape
(4340, 8)
df.size
34720
df.info()
--- ------ -------------- -----
0 name 4340 non-null object
https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 1/5
1 year 4340 non-null int64
2 selling_price 4340 non-null int64
3 km_driven 4340 non-null int64
4 fuel 4340 non-null object
5 seller_type 4340 non-null object
6 transmission 4340 non-null object
7 owner 4340 non-null object
dtypes: int64(3), object(5)
memory usage: 271.4+ KB
#data analytics
df.describe()
year selling_price km_driven
count 4340.000000 4.340000e+03 4340.000000
mean 2013.090783 5.041273e+05 66215.777419
std 4.215344 5.785487e+05 46644.102194
min 1992.000000 2.000000e+04 1.000000
25% 2011.000000 2.087498e+05 35000.000000
50% 2014.000000 3.500000e+05 60000.000000
75% 2016.000000 6.000000e+05 90000.000000
max 2020.000000 8.900000e+06 806599.000000
df.columns
Index(['name', 'year', 'selling_price', 'km_driven', 'fuel', 'seller_type',

'transmission', 'owner'],
dtype='object')
df.name.value_counts().sum()
4340
len(df.name.unique())
1491
df.fuel.value_counts().plot(kind='bar')
<Axes: xlabel='fuel'>
df.isnull().sum()
name 0
year 0
selling_price 0
km_driven 0
fuel 0
seller_type 0
transmission 0
owner 0
dtype: int64
df['fuel'].unique()
array(['Petrol', 'Diesel', 'CNG', 'LPG', 'Electric'], dtype=object)
def FuelN(string):
v=0
if(string=='Petrol'):
v=0
elif(string=='Diesel'):
v=1
elif(string=='CNG'):
v=2
elif(string=='LPG'):
v=3
elif(string=='Electric'):
v=4
return v
FuelN('Diesel')
df['FuelN']=df['fuel'].apply(FuelN)
df=df.drop('fuel',axis=1)
df.head()
name year selling_price km_driven seller_type transmission owner Fu
Maruti First
0 2007 60000 70000 Individual Manual
800 AC Owner
Maruti
Wagon First
R LXI Owner
Minor
Hyundai
First
2 Verna 2012 600000 100000 Individual Manual
Owner
1.6 SX
Datsun
RediGO First
T Owner
Option
Honda
Amaze Second
VX i- Owner
DTEC
from sklearn.preprocessing import LabelEncoder
l=LabelEncoder()
df['seller_typeN']=l.fit_transform(df['seller_type'])
df.seller_type.unique()
array(['Individual', 'Dealer', 'Trustmark Dealer'], dtype=object)
df.seller_typeN.unique()
array([1, 0, 2])
df=df.drop('seller_type',axis=1)
#repeat the coversion for transmission, owner , names of the car
#split the data into features and target
#split the data into train and test
#import model like linear regression
#build the model
#train the model
#test the model ytest values
#take the user input and convert into numpy array as a row
#then predict for user input
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab
import pandas as pd
import numpy as np
df=pd.read_csv('/content/heart 2.csv')
df.head()
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
0 52 1 0 125 212 0 1 168 0 1.0 2 2
1 53 1 0 140 203 1 0 155 1 3.1 0 0
2 70 1 0 145 174 0 1 125 1 2.6 0 0
3 61 1 0 148 203 0 1 161 0 0.0 2 1
4 62 0 0 138 294 1 1 106 0 19 1 3
Next steps: Generate code with df

x=df.drop('target',axis=1)
y=df.target
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random_state=0)
xtest
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 1/5
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope
807 44 1 2 130 233 0 1 179 1 0.4 2
27 58 0 1 136 319 1 0 152 0 0.0 2
77 63 1 0 140 187 0 0 144 1 4.0 2
406 58 1 2 140 211 1 0 165 0 0.0 2
886 61 1 0 120 260 0 1 140 1 3.6 1
... ... ... ... ... ... ... ... ... ... ... ...
808 51 1 2 94 227 0 1 154 1 0.0 2
984 59 1 0 135 234 0 1 161 0 0.5 1
717 56 1 2 130 256 1 0 142 1 0.6 1
167 57 0 0 120 354 0 1 163 1 0.6 2
878 54 1 0 120 188 0 1 113 0 1.4 1
308 13 l
Next steps: Generate code with xtest

pred
array([1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0])
ytest.values
array([1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0])
cm
array([[118, 27],
[ 13, 150]])
ytest.size
308
LGac=accuracy_score(ytest,pred)*100
print('Logistic Regression Accuracy->',LGac)
Logistic Regression Accuracy-> 87.01298701298701
from sklearn.tree import DecisionTreeClassifier
DT=DecisionTreeClassifier()
DT.fit(xtrain,ytrain)
▾ DecisionTreeClassifier
DecisionTreeClassifier()
predDT=DT.predict(xtest)
DTac=accuracy_score(ytest,predDT)*100
print(' Decision Tree Accuracy->',DTac)
Decision Tree Accuracy-> 99.02597402597402
from sklearn.ensemble import RandomForestClassifier
RT=RandomForestClassifier(n_estimators=101)
RT.fit(xtrain,ytrain)
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=101)
RTpred=RT.predict(xtest)
RTac=accuracy_score(ytest,RTpred)*100
print(' RandomForestClassifier Accuracy->',RTac)
RandomForestClassifier Accuracy-> 99.02597402597402
l=['RandomForestClassifier','DecisionTreeClassifier','LogisticRegression']
ac=[LGac,DTac,RTac]
plt.barh(l,ac,color='red',edgecolor='blue')
from sklearn.svm import SVC
u=np.array([61,0,1,148,203,0,1,161,2,0.0,2,1,3]).reshape(1,-1)
ans=RT.predict(u)
if(int(ans)==0):
print('Patient will not have any heart disease')
else:
print('Patient will have heart disease')
Patient will have heart disease

/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have va
warnings.warn(
<ipython-input-126-cb483a7c6719>:3: DeprecationWarning: Conversion of an array with ndim > 0
if(int(ans)==0):
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab
import pandas as pd
import numpy as np
df=pd.read_csv('/content/diabetes.csv')
df.head()
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPe
0 6 148 72 35 0 33.6
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
4 0 137 40 35 168 43.1
x=df.drop('Outcome',axis=1)
y=df.Outcome
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=28)
xtest
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 1/5
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI Diabetes
728 2 175 88 0 0 22.9
392 1 131 64 14 415 23.7
68 1 95 66 13 38 19.6
48 7 103 66 32 0 39.1
74 1 79 75 30 0 32.0
... ... ... ... ... ... ...
412 1 143 84 23 310 42.4
233 4 122 68 0 0 35.0
619 0 119 0 0 0 32.4
557 8 110 76 0 0 27.8
301 2 144 58 33 135 31.6
154 8 l
pred
array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0])
ytest.values
array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1])
cm
array([[93, 10],
[19, 32]])
ytest.size
154
LGac=accuracy_score(ytest,pred)*100
print('Logistic Regression Accuracy->',LGac)
Logistic Regression Accuracy-> 81.16883116883116
from sklearn.tree import DecisionTreeClassifier
DT=DecisionTreeClassifier()
DT.fit(xtrain,ytrain)
▾ DecisionTreeClassifier
DecisionTreeClassifier()
predDT=DT.predict(xtest)
DTac=accuracy_score(ytest,predDT)*100
print(' Decision Tree Accuracy->',DTac)
Decision Tree Accuracy-> 70.77922077922078
from sklearn.ensemble import RandomForestClassifier,VotingClassifier
RT=RandomForestClassifier(n_estimators=101)
RT.fit(xtrain,ytrain)
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=101)
RTpred=RT.predict(xtest)
RTac=accuracy_score(ytest,RTpred)*100
print(' RandomForestClassifier Accuracy->',RTac)
RandomForestClassifier Accuracy-> 79.22077922077922
vcLgDt=VotingClassifier(
estimators=[
('Lg',lr),
('DT',DT),
],voting='soft'
)
vcLgDt.fit(xtrain,ytrain)
vcLgDtpred=vcLgDt.predict(xtest)
vcLgDtac=accuracy_score(ytest,vcLgDtpred)*100
print('Accuracy of combined Algorithm LR and DT is',vcLgDtac)
Accuracy of combined Algorithm LR and DT is 70.12987012987013

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWa
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
vcLgRt=VotingClassifier(
estimators=[
('Lg',lr),
('RT',RT),
],voting='soft'
)
vcLgRt.fit(xtrain,ytrain)
vcLgRtpred=vcLgRt.predict(xtest)
vcLgRtac=accuracy_score(ytest,vcLgRtpred)*100
print('Accuracy of combined Algorithm LR and RT is',vcLgRtac)
Accuracy of combined Algorithm LR and RT is 81.16883116883116
vcLgRtDT=VotingClassifier(
estimators=[
('RT',RT),
('Lg',lr),
],voting='soft'
)
vcLgRtDT.fit(xtrain,ytrain)
vcLgRtDTpred=vcLgRtDT.predict(xtest)
vcLgRtDTac=accuracy_score(ytest,vcLgRtDTpred)*100
print('Accuracy of combined Algorithm LR and RT is',vcLgRtDTac)
Accuracy of combined Algorithm LR and RT is 79.87012987012987

l=['RandomForestClassifier','DecisionTreeClassifier','LogisticRegression']
ac=[LGac,DTac,RTac]
plt.barh(l,ac,color='red',edgecolor='blue')
from sklearn.svm import SVC
u=np.array([2,144,58,33,235,31.6,0.422,35]).reshape(1,-1)
ans=RT.predict(u)
if(int(ans)==0):
print('Patient will not have any Diabeties')
else:
print('Patient will have Diabeties')
Patient will have Diabeties

/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have va
warnings.warn(
<ipython-input-212-a2d5fed3c7b4>:3: DeprecationWarning: Conversion of an array with ndim > 0
if(int(ans)==0):
02/06/2024, 00:00 KMeans.ipynb - Colab

import pandas as pd
import numpy as np
from sklearn.metrics import silhouette_score, calinski_harabasz_sco
data=pd.read_csv('/content/Mall_Customers.csv')
data.head()
CustomerID Gender Age Annual Income (k$) Spending Score (1-100)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
data['Spending Score (1-100)'].value_counts()
Spending Score (1-100)

42 8
55 7
46 6
73 6
35 5
..
31 1
44 1
53 1
65 1
18 1
Name: count, Length: 84, dtype: int64
features=data.columns
lb=LabelEncoder()
data['GenderN']=lb.fit_transform(data['Gender'])
data=data.drop('Gender',axis=1)
data=data.drop('CustomerID',axis=1)
data.info()
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 1/6
--- ------ -------------- -----
0 Age 200 non-null int64
1 Annual Income (k$) 200 non-null int64
2 Spending Score (1-100) 200 non-null int64
3 GenderN 200 non-null int64
dtypes: int64(4)
memory usage: 6.4 KB
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(data)
scaled_features
array([[-1.42456879, -1.73899919, -0.43480148, 1.12815215],

[-1.28103541, -1.73899919, 1.19570407, 1.12815215],
[-1.3528021 , -1.70082976, -1.71591298, -0.88640526],
[-1.13750203, -1.70082976, 1.04041783, -0.88640526],
[-0.56336851, -1.66266033, -0.39597992, -0.88640526],
[-1.20926872, -1.66266033, 1.00159627, -0.88640526],
[-0.27630176, -1.62449091, -1.71591298, -0.88640526],
[-1.13750203, -1.62449091, 1.70038436, -0.88640526],
[ 1.80493225, -1.58632148, -1.83237767, 1.12815215],
[-0.6351352 , -1.58632148, 0.84631002, -0.88640526],
[ 2.02023231, -1.58632148, -1.4053405 , 1.12815215],
[-0.27630176, -1.58632148, 1.89449216, -0.88640526],
[ 1.37433211, -1.54815205, -1.36651894, -0.88640526],
[-1.06573534, -1.54815205, 1.04041783, -0.88640526],
[-0.13276838, -1.54815205, -1.44416206, 1.12815215],
[-1.20926872, -1.54815205, 1.11806095, 1.12815215],
[-0.27630176, -1.50998262, -0.59008772, -0.88640526],
[-1.3528021 , -1.50998262, 0.61338066, 1.12815215],
[ 0.94373197, -1.43364376, -0.82301709, 1.12815215],
[-0.27630176, -1.43364376, 1.8556706 , -0.88640526],
[-0.27630176, -1.39547433, -0.59008772, 1.12815215],
[-0.99396865, -1.39547433, 0.88513158, 1.12815215],
[ 0.51313183, -1.3573049 , -1.75473454, -0.88640526],
[-0.56336851, -1.3573049 , 0.88513158, 1.12815215],
[ 1.08726535, -1.24279661, -1.4053405 , -0.88640526],
[-0.70690189, -1.24279661, 1.23452563, 1.12815215],
[ 0.44136514, -1.24279661, -0.7065524 , -0.88640526],
[-0.27630176, -1.24279661, 0.41927286, 1.12815215],
[ 0.08253169, -1.20462718, -0.74537397, -0.88640526],
[-1.13750203, -1.20462718, 1.42863343, -0.88640526],
[ 1.51786549, -1.16645776, -1.7935561 , 1.12815215],
[-1.28103541, -1.16645776, 0.88513158, -0.88640526],
[ 1.01549866, -1.05194947, -1.7935561 , 1.12815215],
[-1.49633548, -1.05194947, 1.62274124, 1.12815215],
[ 0.7284319 , -1.05194947, -1.4053405 , -0.88640526],
[-1.28103541, -1.05194947, 1.19570407, -0.88640526],
[ 0.22606507, -1.01378004, -1.28887582, -0.88640526],
[-0.6351352 , -1.01378004, 0.88513158, -0.88640526],
[-0.20453507, -0.89927175, -0.93948177, -0.88640526],
[-1.3528021 , -0.89927175, 0.96277471, -0.88640526],
[ 1.87669894, -0.86110232, -0.59008772, -0.88640526],
[-1.06573534, -0.86110232, 1.62274124, 1.12815215],
[ 0.65666521, -0.82293289, -0.55126616, 1.12815215],
[-0.56336851, -0.82293289, 0.41927286, -0.88640526],
[ 0.7284319 , -0.82293289, -0.86183865, -0.88640526],
[-1.06573534, -0.82293289, 0.5745591 , -0.88640526],
[ 0.80019859, -0.78476346, 0.18634349, -0.88640526],
[-0.85043527, -0.78476346, -0.12422899, -0.88640526],
[-0.70690189, -0.78476346, -0.3183368 , -0.88640526],
[-0.56336851, -0.78476346, -0.3183368 , -0.88640526],
[ 0.7284319 , -0.70842461, 0.06987881, -0.88640526],
[-0.41983513, -0.70842461, 0.38045129, 1.12815215],
[-0.56336851, -0.67025518, 0.14752193, -0.88640526],
[ 1.4460988 , -0.67025518, 0.38045129, 1.12815215],
[ 0.80019859, -0.67025518, -0.20187212, -0.88640526],
[ 0.58489852, -0.67025518, -0.35715836, 1.12815215],
[ 0.87196528, -0.63208575, -0.00776431, -0.88640526],
[ 2.16376569, -0.63208575, -0.16305055, 1.12815215],
from sklearn.cluster import KMeans

wcss = []
for i in range(1, 11):
kmeans = KMeans(random_state=42, n_clusters=i)
kmeans.fit(scaled_features)
wcss.append(kmeans.inertia_)
# Plot the elbow graph

plt.figure(figsize=(10, 5))
plt.plot(range(1, 11), wcss, marker='o', linestyle='--')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
warnings.warn(
optimal_clusters = 7
kmeans = KMeans(random_state=42, n_clusters=optimal_clusters)
clusters = kmeans.fit_predict(scaled_features)
warnings.warn(
data['Cluster'] = clusters
data
Age Annual Income (k$) Spending Score (1-100) GenderN Cluster
0 19 15 39 1 3
1 21 15 81 1 3
2 20 16 6 0 2
3 23 16 77 0 2
4 31 17 40 0 2
... ... ... ... ... ...
195 35 120 79 0 1
196 45 126 28 0 4
197 32 126 74 1 5
198 32 137 18 1 4
199 30 137 83 1 5
200 rows × 5 columns
plt.figure(figsize=(10, 5))
plt.scatter(data['Annual Income (k$)'], data['Spending Score (1-100
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.show()
# Calculate and print evaluation metrics

silhouette_avg = silhouette_score(scaled_features, clusters)
calinski_harabasz = calinski_harabasz_score(scaled_features, cluste
davies_bouldin = davies_bouldin_score(scaled_features, clusters)
print(f'Silhouette Score: {silhouette_avg:.3f}')

print(f'Calinski-Harabasz Index: {calinski_harabasz:.3f}')
print(f'Davies-Bouldin Index: {davies_bouldin:.3f}')
Silhouette Score: 0.357

Calinski-Harabasz Index: 76.778
Davies-Bouldin Index: 0.980
1. Silhouette Score Range: -1 to 1 Interpretation: Close to 1: Indicates that the data points are
well-clustered, with data points very close to the centroid of their cluster and far from other
clusters. Close to 0: Indicates that the data points are on or very close to the boundary
between clusters, implying overlapping clusters. Negative Values: Indicate that the data
points may have been assigned to the wrong clusters. Desired Value: Ideally, you want a
score close to 1, but a value above 0.5 is generally considered good. Values around 0.25-
0.5 may be acceptable depending on the complexity and nature of the data.
2. Calinski-Harabasz Index Range: No fixed range (higher is better) Interpretation: Higher
Values: Indicate that the clusters are dense and well-separated from each other. Desired
Value: There is no absolute threshold, but higher values are better. You should compare the
index across different models or configurations; the configuration with the highest Calinski-
Harabasz Index is considered the best.
3. Davies-Bouldin Index Range: 0 to ∞ (lower is better) Interpretation: Lower Values: Indicate
that the clusters are compact and well-separated from each other. Desired Value: Values
closer to 0 are better. Similar to the Calinski-Harabasz Index, you should compare the index
across different models or configurations; the configuration with the lowest Davies-Bouldin
Index is considered the best.
27/05/2024, 10:27 car sales prediction.ipynb - Colab
import pandas as pd
import numpy as np
df=pd.read_csv('/content/CAR DETAILS FROM CAR DEKHO.csv')
df1=pd.read_csv('/content/CAR DETAILS FROM CAR DEKHO.csv')
df.head()
name year selling_price km_driven fuel seller_type transmission ow
Maruti
800 AC Ow
Maruti
Wagon
R LXI Ow
Minor
df.size
34720
df.shape
(4340, 8)
df.describe()
year selling_price km_driven
count 4340.000000 4.340000e+03 4340.000000
mean 2013.090783 5.041273e+05 66215.777419
std 4.215344 5.785487e+05 46644.102194
min 1992.000000 2.000000e+04 1.000000
25% 2011.000000 2.087498e+05 35000.000000
50% 2014.000000 3.500000e+05 60000.000000
75% 2016.000000 6.000000e+05 90000.000000
max 2020.000000 8.900000e+06 806599.000000
df.info
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None,
max_cols: int | None=None, memory_usage: bool | str | None=None,
show_counts: bool | None=None) -> None
Print a concise summary of a DataFrame.
This method prints information about a DataFrame including

the index dtype and columns, non-null values and memory usage.
df.columns
Index(['name', 'year', 'selling_price', 'km_driven', 'fuel', 'seller_type',

'transmission', 'owner'],
dtype='object')
df['fuel'].value_counts().plot(kind='bar')
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 1/5
<Axes: xlabel='fuel'>
df['transmission'].value_counts().plot(kind='bar')
<Axes: xlabel='transmission'>
df.groupby('transmission')['selling_price'].min()
transmission
Automatic 79000
Manual 20000
Name: selling_price, dtype: int64
Data Preprocessing
df['seller_type'].value_counts()
seller_type
Individual 3244
Dealer 994
Trustmark Dealer 102
Name: count, dtype: int64
def stc(string):
v=0
if(string=='Individual'):
v=0
elif(string=='Dealer'):
v=1
elif(string=='Trustmark Dealer'):
v=2
return v
df['seller_type_N']=df['seller_type'].apply(stc)
df[df['seller_type']=='Dealer']
name year selling_price km_driven fuel seller_type transmissio
Toyota
Corolla
12 2018 1650000 25000 Petrol Dealer Automati
Altis 1.8 VL
CVT
Toyota
Corolla
25 2018 1650000 25000 Petrol Dealer Automati
Altis 1.8 VL
CVT
Maruti Ciaz
26 2015 585000 24000 Petrol Dealer Manua
VXi Plus
Hyundai
27 Venue SX 2019 1195000 5000 Diesel Dealer Manua
Opt Diesel
Jaguar XF
29 2.2 Litre 2014 1964999 28000 Diesel Dealer Automati
Luxury
... ... ... ... ... ... ... .
Audi Q5
3.0 TDI
4304 2018 3899000 22000 Diesel Dealer Automati
Quattro
Technology
Hyundai
4306 i10 Sportz 2011 235000 43100 Petrol Dealer Manua
1.2
df['transmission'].unique()
array(['Manual', 'Automatic'], dtype=object)
def ttc(string):
v=0
if(string=='Manual'):
v=0
elif(string=='Automatic'):
v=1
return v
df['transmissionN']=df['transmission'].apply(ttc)
lef=LabelEncoder()
df['fuelN']=lef.fit_transform(df['fuel'])
df['fuelN']
0 4
1 4
2 1
3 4
4 1
..
4335 1
4336 1
4337 4
4338 1
4339 4
Name: fuelN, Length: 4340, dtype: int64
df['fuel'].unique()
array(['Petrol', 'Diesel', 'CNG', 'LPG', 'Electric'], dtype=object)
df=df.drop('fuel',axis=1)
df=df.drop('seller_type',axis=1)
df=df.drop("transmission",axis=1)
leo=LabelEncoder()
df['ownerN']=leo.fit_transform(df['owner'])
df=df.drop('owner',axis=1)
lec=LabelEncoder()
df['nameN']=lec.fit_transform(df['name'])
df=df.drop('name',axis=1)
df.head()
year selling_price km_driven seller_type_N transmissionN fuelN ownerN
0 2007 60000 70000 0 0 4 0
1 2007 135000 50000 0 0 4 0
2 2012 600000 100000 0 0 1 0
3 2017 250000 46000 0 0 4 0
4 2014 450000 141000 0 0 1 2
df.columns
Index(['year', 'selling_price', 'km_driven', 'seller_type_N', 'transmissionN',

'fuelN', 'ownerN', 'nameN'],
dtype='object')
X=df.drop('selling_price',axis=1).values.reshape(4340,7)
X.shape
(4340, 7)
y=df.selling_price.values.reshape(-1,1)
y
array([[ 60000],
[135000],
[600000],
...,
[110000],
[865000],
[225000]])
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.3)
from sklearn.linear_model import LinearRegression
LR=LinearRegression()
LR.fit(xtrain,ytrain)
▾ LinearRegression
LinearRegression()
pred=LR.predict(xtest)
df1.name.unique()
array(['Maruti 800 AC', 'Maruti Wagon R LXI Minor',

'Hyundai Verna 1.6 SX', ..., 'Mahindra Verito 1.5 D6 BSIII',
'Toyota Innova 2.5 VX (Diesel) 8 Seater BS IV',
'Hyundai i20 Magna 1.4 CRDi'], dtype=object)
df1.transmission.unique()
array(['Manual', 'Automatic'], dtype=object)

Codes Frome Dayy 1 to Day 6

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Codes Frome Dayy 1 to Day 6

Uploaded by

Copyright:

Available Formats

27/05/2024, 21:04 Day1.

WelCome to Mevi-RRIT Bank

Please give me your phoneno

Welcome to MEVI-RRIT Super Market

Start coding or generate with AI.

#read the data

#explore the data

#Analyse the data

Unnamed: 0 Restaurant ID Number of Orders Price Rating

count 50.00000 50.00000 50.000000 50.000000 50.000000

mean 24.50000 25.50000 376.400000 1403.389000 4.220000

std 14.57738 14.57738 57.738326 750.542149 0.333197

min 0.00000 1.00000 260.000000 150.500000 3.700000

25% 12.25000 13.25000 340.000000 763.300000 3.925000

50% 24.50000 25.50000 375.000000 1425.625000 4.200000

75% 36.75000 37.75000 410.000000 2038.187500 4.500000

max 49.00000 50.00000 500.000000 2650.750000 4.800000

import matplotlib.pyplot as plt

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

Text(0.5, 1.0, 'EX vs Sal')

<BarContainer object of 4 artists>

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

array([111941.18950598, 63174.58719623, 52445.93468808, 124620.50610651,

Start coding or generate with AI.

0 125 212 0 1 168 0 1.0 2 2 3 0

0 140 203 1 0 155 1 3.1 0 0 3 0

0 145 174 0 1 125 1 2.6 0 0 3 0

0 148 203 0 1 161 0 0.0 2 1 3 0

0 138 294 1 1 106 0 1.9 1 3 2 0

Next steps: Generate code with df

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

952 54 0 2 135 304 1 1 170 0 0.0 2

411 56 1 0 125 249 1 0 144 1 1.2 1

363 53 1 2 130 246 1 0 173 0 0.0 2

234 49 0 0 130 269 0 1 163 0 0.0 2

431 65 0 0 150 225 0 0 114 0 1.0 1

947 54 0 2 160 201 0 1 163 0 0.0 2

157 54 1 2 120 258 0 0 147 0 0.4 1

278 55 1 0 160 289 0 0 145 1 0.8 1

404 61 1 0 140 207 0 0 138 1 1.9 2

487 65 1 0 135 254 0 0 127 0 2.8 1

308 rows × 13 columns

Next steps: Generate code with xtest

from sklearn.metrics import confusion_matrix,accuracy_score

Start coding or generate with AI.

#importing the libraries

#Reading the dataset

#exploring the dataset

name year selling_price km_driven fuel seller_type transmission ow

year selling_price km_driven

count 4340.000000 4.340000e+03 4340.000000

mean 2013.090783 5.041273e+05 66215.777419

std 4.215344 5.785487e+05 46644.102194

min 1992.000000 2.000000e+04 1.000000

25% 2011.000000 2.087498e+05 35000.000000

50% 2014.000000 3.500000e+05 60000.000000

75% 2016.000000 6.000000e+05 90000.000000

max 2020.000000 8.900000e+06 806599.000000

Index(['name', 'year', 'selling_price', 'km_driven', 'fuel', 'seller_type',

array(['Petrol', 'Diesel', 'CNG', 'LPG', 'Electric'], dtype=object)

name year selling_price km_driven seller_type transmission owner Fu

from sklearn.preprocessing import LabelEncoder