Professional Documents
Culture Documents
Codes Frome Dayy 1 to Day 6
Codes Frome Dayy 1 to Day 6
ipynb - Colab
print('hello world')
hello world
t=1
bal=100000
mpin=1234
print('WelCome to Mevi-RRIT Bank ')
while(t<=3):
upin=int(input('Enter Pin'))
if(upin==mpin):
print('Sucessfully Loggedin')
print('1-Withdraw\n2-Deposit')
ch=input('Enter your choice')
if(ch=='1'):
amt=int(input('Enter the Amount to withdraw'))
bal=bal-amt
print('your new balance is',bal)
elif(ch=='2'):
amt=int(input('Enter the amount to deposit'))
bal=bal+amt
print('your new balance is',bal)
break
elif(t==3):
print('sorry account blocked')
else:
print('Invalid Pin Try again')
t=t+1
v=2
while(v):
ans=input('Please give me your phone')
if(ans=='yes'):
print('thank you, you are so sweet')
v=0
else:
print('please please please')
https://colab.research.google.com/drive/12E13aeyoERFp9HCRe_iHpw4QNrLZw8HJ#scrollTo=ow9RXnJ8Z6uN 1/3
27/05/2024, 21:04 Day1.ipynb - Colab
bill=0
print('Welcome to MEVI-RRIT Super Market')
while(True):
print('1-Rice Flour-30Rs/kg\n2-CornFlakes-40Rs/kg\n3-exit')
g=int(input('Enter your choice'))
if(g==1):
q=int(input('enter the quantity'))
bill+=30*q #bill=bill+30
elif(g==2):
q=int(input('enter the quantity'))
bill+=40*q
print('Detergents')
print('1-Surfexcel-30rs\n2-Tide-35Rs\n3-exit')
d=int(input('Enter your choice'))
if(d==1):
q=int(input('enter the quantity'))
bill+=30*q
elif(d==2):
q=int(input('enter the quantity'))
bill+=35*q
print('your total bill is',bill)
ch=input('do you want to exit')
if(ch=='yes'):
print('thank you')
break
a=1
print(type(a))
<class 'int'>
a=10
https://colab.research.google.com/drive/12E13aeyoERFp9HCRe_iHpw4QNrLZw8HJ#scrollTo=ow9RXnJ8Z6uN 3/3
30/05/2024, 07:17 Untitled64.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Number
Unnamed: Restaurant Restaurant Food
of Price City Rating
0 ID Name Name
Orders
Masala
0 0 1 Spicy Delight 450 150.50 Koramangala 4.5
Dosa
Butter
1 1 2 Urban Diner 320 200.00 Indiranagar 4.0
Chicken
Paneer
Green
2 2 3 280 250.75 Whitefield 3.8 Butter
Garden
Masala
df.tail(1)
Number
Unnamed: Restaurant Restaurant Food
of Price City Rating
0 ID Name Name
Orders
Mutton
49 49 50 Fusion Feast 390 2650.75 Thanisandra 4.8
Frankie
df.shape
(50, 8)
df.size
400
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 50 non-null int64
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 1/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
1 Restaurant ID 50 non-null int64
2 Restaurant Name 50 non-null object
3 Number of Orders 50 non-null int64
4 Price 50 non-null float64
5 City 50 non-null object
6 Rating 50 non-null float64
7 Food Name 50 non-null object
dtypes: float64(2), int64(3), object(3)
memory usage: 3.2+ KB
df.describe()
#visualizing Data
df.groupby('Price')['Number of Orders'].mean()
Price
150.50 450.0
200.00 320.0
250.75 280.0
300.20 500.0
350.00 350.0
400.25 410.0
450.10 290.0
500.75 470.0
550.20 340.0
600.40 330.0
650.60 480.0
700.80 260.0
750.90 390.0
800.50 410.0
850.75 320.0
900.00 460.0
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 2/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
950.25 300.0
1000.50 350.0
1050.75 400.0
1100.00 370.0
1150.25 420.0
1200.50 380.0
1300.00 310.0
1350.25 360.0
1400.50 490.0
1450.75 300.0
1500.00 410.0
1550.25 290.0
1600.50 460.0
1650.75 370.0
1700.00 430.0
1750.25 380.0
1800.50 340.0
1850.75 320.0
1900.00 400.0
1950.25 350.0
2000.50 370.0
2050.75 360.0
2100.00 440.0
2150.25 350.0
2200.50 410.0
2250.75 390.0
2300.00 310.0
2350.25 400.0
2400.50 420.0
2450.75 360.0
2500.00 340.0
2550.25 410.0
2600.50 380.0
2650.75 390.0
Name: Number of Orders, dtype: float64
df.groupby('Food Name')['Price'].mean().plot(kind='pie')
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 3/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
<Axes: ylabel='Price'>
import numpy as np
x=np.array([1,2,3,4,5,6,7,8,9,10])
print(x)
y=x**2
print(y)
[ 1 2 3 4 5 6 7 8 9 10]
[ 1 4 9 16 25 36 49 64 81 100]
print(np.max(x))
print(np.min(x))
print(np.mean(x))
print(np.std(x))
print(np.sum(x))
print(np.sqrt(y))
10
1
5.5
2.8722813232690143
55
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
ar=x[np.where(x%2==0)]
ar
array([ 2, 4, 6, 8, 10])
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 4/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
a=[[1,2,3],[4,5,6],[7,8,9]]
print(a)
ar=np.array(a)
print(ar)
print(ar[1][2])
ar=np.array([12,13,14,156,12.6,98.7])
m=ar[np.argmax(ar)]
m
156.0
a=[1,2,3,4,5,6,7,8]
ar=np.array(a)
print(ar)
m=ar.reshape(4,2)
print(m)
[1 2 3 4 5 6 7 8]
[[1 2]
[3 4]
[5 6]
[7 8]]
print(x)
print(y)
[ 1 2 3 4 5 6 7 8 9 10]
[ 1 4 9 16 25 36 49 64 81 100]
plt.scatter(x,y,marker='p',color='r')
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 5/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
<matplotlib.collections.PathCollection at 0x7ab0b3e7c1f0>
plt.plot(x,y,marker='o',linestyle='--',color='r',markerfacecolor='b
plt.xlabel('Experence')
plt.ylabel('Salary')
plt.title('EX vs Sal')
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 6/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
x=['Apples','Oranges','Papayas','Kiwis']
y=[1257,754,678,986]
plt.bar(x,y,color='red',edgecolor='k')
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 7/8
30/05/2024, 07:17 Untitled64.ipynb - Colab
x=[8,2,6,1,1,4,2]
e=[0.1,0.1,0.1,0.1,0.1,0.2,0.2]
c=['yellow','green','blue','orange','black','red','pink']
l=['College','Eating','Sleeping','Houseold work','Friends','Mobile'
len(x)==len(x)
True
plt.pie(x,labels=l,autopct='%.2f%%',explode=e,colors=c)
([<matplotlib.patches.Wedge at 0x7ab0b1273ac0>,
<matplotlib.patches.Wedge at 0x7ab0b1273a00>,
<matplotlib.patches.Wedge at 0x7ab0b12587f0>,
<matplotlib.patches.Wedge at 0x7ab0b1258e80>,
<matplotlib.patches.Wedge at 0x7ab0b1259510>,
<matplotlib.patches.Wedge at 0x7ab0b1259ba0>,
https://colab.research.google.com/drive/1BT3FM4sN-wXLB8Y1x6aJgtZ9ExbC9-FE 8/8
31/05/2024, 22:21 Linear Regression.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df=pd.read_csv('/content/Salary_Data (2).csv')
df.head()
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
df.shape
(30, 2)
df.isnull().sum()
YearsExperience 0
Salary 0
dtype: int64
plt.scatter(df['YearsExperience'],df['Salary'],marker='*')
https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 1/4
31/05/2024, 22:21 Linear Regression.ipynb - Colab
<matplotlib.collections.PathCollection at 0x7e5cb2cba3e0>
x=df.drop('Salary',axis=1)
y=df.Salary
0 39343.0
1 46205.0
2 37731.0
3 43525.0
4 39891.0
5 56642.0
6 60150.0
7 54445.0
8 64445.0
9 57189.0
10 63218.0
11 55794.0
12 56957.0
13 57081.0
14 61111.0
15 67938.0
16 66029.0
17 83088.0
18 81363.0
19 93940.0
20 91738.0
21 98273.0
22 101302.0
23 113812.0
https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 2/4
31/05/2024, 22:21 Linear Regression.ipynb - Colab
24 109431.0
25 105582.0
26 116969.0
27 112635.0
28 122391.0
29 121872.0
Name: Salary, dtype: float64
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)
xtrain.shape
(24, 1)
xtest.shape
(6, 1)
LR=LinearRegression()
LR.fit(xtrain,ytrain)
▾ LinearRegression
LinearRegression()
pred=LR.predict(xtest)
pred
ytest
25 105582.0
12 56957.0
5 56642.0
28 122391.0
6 60150.0
0 39343.0
Name: Salary, dtype: float64
https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 3/4
31/05/2024, 22:21 Linear Regression.ipynb - Colab
a=int(input('Enter exp'))
ar=np.array([a]).reshape(1,-1)
ar
Enter exp15
array([[15]])
salary = LR.predict(ar)
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X do
warnings.warn(
salary
array([170461.11227768])
https://colab.research.google.com/drive/14YEmE5b_sQWQ2uR8y_KpwMpfzpc7DpNq#scrollTo=fMWTJPrh-HhM 4/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df=pd.read_csv('/content/heart 2.csv')
df.head()
cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target
x=df.drop('target',axis=1)
y=df.target
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random
lr=LogisticRegression()
lr.fit(xtrain,ytrain)
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 1/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()
xtest
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope
... ... ... ... ... ... ... ... ... ... ... ...
pred=lr.predict(xtest)
pred
array([1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0,
1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 2/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab
0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0])
ytest.values
array([1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0])
cm=confusion_matrix(ytest,pred)
cm
array([[105, 31],
[ 25, 147]])
ytest.size
308
ac=accuracy_score(ytest,pred)
ac
0.8181818181818182
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 3/4
31/05/2024, 22:23 Logistic Regression FDP RRIT -MEVI.ipynb - Colab
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=I2DNza3eL0sk 4/4
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
Maruti
0 2007 60000 70000 Petrol Individual Manual
800 AC Ow
Maruti
Wagon
1 2007 135000 50000 Petrol Individual Manual
R LXI Ow
Minor
Hyundai
2 Verna 2012 600000 100000 Diesel Individual Manual
Ow
1.6 SX
Datsun
RediGO
3 2017 250000 46000 Petrol Individual Manual
T Ow
Option
Honda
Amaze Sec
4 2014 450000 141000 Diesel Individual Manual
VX i- Ow
DTEC
df.shape
(4340, 8)
df.size
34720
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4340 entries, 0 to 4339
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 4340 non-null object
https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 1/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
1 year 4340 non-null int64
2 selling_price 4340 non-null int64
3 km_driven 4340 non-null int64
4 fuel 4340 non-null object
5 seller_type 4340 non-null object
6 transmission 4340 non-null object
7 owner 4340 non-null object
dtypes: int64(3), object(5)
memory usage: 271.4+ KB
#data analytics
df.describe()
df.columns
df.name.value_counts().sum()
4340
len(df.name.unique())
1491
df.fuel.value_counts().plot(kind='bar')
https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 2/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
<Axes: xlabel='fuel'>
df.isnull().sum()
name 0
year 0
selling_price 0
km_driven 0
fuel 0
seller_type 0
transmission 0
owner 0
dtype: int64
df['fuel'].unique()
https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 3/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
def FuelN(string):
v=0
if(string=='Petrol'):
v=0
elif(string=='Diesel'):
v=1
elif(string=='CNG'):
v=2
elif(string=='LPG'):
v=3
elif(string=='Electric'):
v=4
return v
FuelN('Diesel')
df['FuelN']=df['fuel'].apply(FuelN)
df=df.drop('fuel',axis=1)
df.head()
Maruti First
0 2007 60000 70000 Individual Manual
800 AC Owner
Maruti
Wagon First
1 2007 135000 50000 Individual Manual
R LXI Owner
Minor
Hyundai
First
2 Verna 2012 600000 100000 Individual Manual
Owner
1.6 SX
Datsun
RediGO First
3 2017 250000 46000 Individual Manual
T Owner
Option
Honda
Amaze Second
4 2014 450000 141000 Individual Manual
VX i- Owner
DTEC
l=LabelEncoder()
https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 4/5
31/05/2024, 23:06 Data preprocessing fdp 5 .ipynb - Colab
df['seller_typeN']=l.fit_transform(df['seller_type'])
df.seller_type.unique()
df.seller_typeN.unique()
array([1, 0, 2])
df=df.drop('seller_type',axis=1)
#take the user input and convert into numpy array as a row
https://colab.research.google.com/drive/1mtXRApsWREN_dEmjjYwrgdwrP5iTawhC#scrollTo=BcgaAErUE47n 5/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df=pd.read_csv('/content/heart 2.csv')
df.head()
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
x=df.drop('target',axis=1)
y=df.target
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random_state=0)
lr=LogisticRegression()
lr.fit(xtrain,ytrain)
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()
xtest
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 1/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope
... ... ... ... ... ... ... ... ... ... ... ...
308 13 l
pred=lr.predict(xtest)
pred
array([1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0])
ytest.values
array([1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0])
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 2/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab
cm=confusion_matrix(ytest,pred)
cm
array([[118, 27],
[ 13, 150]])
ytest.size
308
LGac=accuracy_score(ytest,pred)*100
print('Logistic Regression Accuracy->',LGac)
DT=DecisionTreeClassifier()
DT.fit(xtrain,ytrain)
▾ DecisionTreeClassifier
DecisionTreeClassifier()
predDT=DT.predict(xtest)
DTac=accuracy_score(ytest,predDT)*100
print(' Decision Tree Accuracy->',DTac)
RT=RandomForestClassifier(n_estimators=101)
RT.fit(xtrain,ytrain)
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=101)
RTpred=RT.predict(xtest)
RTac=accuracy_score(ytest,RTpred)*100
print(' RandomForestClassifier Accuracy->',RTac)
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 3/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab
l=['RandomForestClassifier','DecisionTreeClassifier','LogisticRegression']
ac=[LGac,DTac,RTac]
plt.barh(l,ac,color='red',edgecolor='blue')
u=np.array([61,0,1,148,203,0,1,161,2,0.0,2,1,3]).reshape(1,-1)
ans=RT.predict(u)
if(int(ans)==0):
print('Patient will not have any heart disease')
else:
print('Patient will have heart disease')
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 4/5
01/06/2024, 20:00 Comparison of algorithms FDP RRIT -MEVI.ipynb - Colab
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg#scrollTo=UgC3_z0XM5mM 5/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df=pd.read_csv('/content/diabetes.csv')
df.head()
0 6 148 72 35 0 33.6
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
x=df.drop('Outcome',axis=1)
y=df.Outcome
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=28)
lr=LogisticRegression()
lr.fit(xtrain,ytrain)
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regress
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()
xtest
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 1/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab
68 1 95 66 13 38 19.6
48 7 103 66 32 0 39.1
74 1 79 75 30 0 32.0
154 8 l
pred=lr.predict(xtest)
pred
array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0])
ytest.values
array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1])
cm=confusion_matrix(ytest,pred)
cm
array([[93, 10],
[19, 32]])
ytest.size
154
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 2/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab
LGac=accuracy_score(ytest,pred)*100
print('Logistic Regression Accuracy->',LGac)
DT=DecisionTreeClassifier()
DT.fit(xtrain,ytrain)
▾ DecisionTreeClassifier
DecisionTreeClassifier()
predDT=DT.predict(xtest)
DTac=accuracy_score(ytest,predDT)*100
print(' Decision Tree Accuracy->',DTac)
RT=RandomForestClassifier(n_estimators=101)
RT.fit(xtrain,ytrain)
▾ RandomForestClassifier
RandomForestClassifier(n_estimators=101)
RTpred=RT.predict(xtest)
RTac=accuracy_score(ytest,RTpred)*100
print(' RandomForestClassifier Accuracy->',RTac)
vcLgDt=VotingClassifier(
estimators=[
('Lg',lr),
('DT',DT),
],voting='soft'
)
vcLgDt.fit(xtrain,ytrain)
vcLgDtpred=vcLgDt.predict(xtest)
vcLgDtac=accuracy_score(ytest,vcLgDtpred)*100
print('Accuracy of combined Algorithm LR and DT is',vcLgDtac)
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 3/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
vcLgRt=VotingClassifier(
estimators=[
('Lg',lr),
('RT',RT),
],voting='soft'
)
vcLgRt.fit(xtrain,ytrain)
vcLgRtpred=vcLgRt.predict(xtest)
vcLgRtac=accuracy_score(ytest,vcLgRtpred)*100
print('Accuracy of combined Algorithm LR and RT is',vcLgRtac)
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWa
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
Accuracy of combined Algorithm LR and RT is 81.16883116883116
vcLgRtDT=VotingClassifier(
estimators=[
('RT',RT),
('Lg',lr),
],voting='soft'
)
vcLgRtDT.fit(xtrain,ytrain)
vcLgRtDTpred=vcLgRtDT.predict(xtest)
vcLgRtDTac=accuracy_score(ytest,vcLgRtDTpred)*100
print('Accuracy of combined Algorithm LR and RT is',vcLgRtDTac)
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
l=['RandomForestClassifier','DecisionTreeClassifier','LogisticRegression']
ac=[LGac,DTac,RTac]
plt.barh(l,ac,color='red',edgecolor='blue')
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 4/5
02/06/2024, 00:03 Comparison of algorithms Diabetes Dataset FDP RRIT -MEVI.ipynb - Colab
u=np.array([2,144,58,33,235,31.6,0.422,35]).reshape(1,-1)
ans=RT.predict(u)
if(int(ans)==0):
print('Patient will not have any Diabeties')
else:
print('Patient will have Diabeties')
https://colab.research.google.com/drive/15ofczx-qdfUrVWQqoALn6jfEdgx4usqg 5/5
02/06/2024, 00:00 KMeans.ipynb - Colab
data=pd.read_csv('/content/Mall_Customers.csv')
data.head()
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
features=data.columns
lb=LabelEncoder()
data['GenderN']=lb.fit_transform(data['Gender'])
data=data.drop('Gender',axis=1)
data=data.drop('CustomerID',axis=1)
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 1/6
02/06/2024, 00:00 KMeans.ipynb - Colab
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 200 non-null int64
1 Annual Income (k$) 200 non-null int64
2 Spending Score (1-100) 200 non-null int64
3 GenderN 200 non-null int64
dtypes: int64(4)
memory usage: 6.4 KB
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 3/6
02/06/2024, 00:00 KMeans.ipynb - Colab
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
optimal_clusters = 7
kmeans = KMeans(random_state=42, n_clusters=optimal_clusters)
clusters = kmeans.fit_predict(scaled_features)
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: Future
warnings.warn(
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 4/6
02/06/2024, 00:00 KMeans.ipynb - Colab
data['Cluster'] = clusters
data
0 19 15 39 1 3
1 21 15 81 1 3
2 20 16 6 0 2
3 23 16 77 0 2
4 31 17 40 0 2
195 35 120 79 0 1
196 45 126 28 0 4
197 32 126 74 1 5
198 32 137 18 1 4
199 30 137 83 1 5
plt.figure(figsize=(10, 5))
plt.scatter(data['Annual Income (k$)'], data['Spending Score (1-100
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.show()
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 5/6
02/06/2024, 00:00 KMeans.ipynb - Colab
1. Silhouette Score Range: -1 to 1 Interpretation: Close to 1: Indicates that the data points are
well-clustered, with data points very close to the centroid of their cluster and far from other
clusters. Close to 0: Indicates that the data points are on or very close to the boundary
between clusters, implying overlapping clusters. Negative Values: Indicate that the data
points may have been assigned to the wrong clusters. Desired Value: Ideally, you want a
score close to 1, but a value above 0.5 is generally considered good. Values around 0.25-
0.5 may be acceptable depending on the complexity and nature of the data.
2. Calinski-Harabasz Index Range: No fixed range (higher is better) Interpretation: Higher
Values: Indicate that the clusters are dense and well-separated from each other. Desired
Value: There is no absolute threshold, but higher values are better. You should compare the
index across different models or configurations; the configuration with the highest Calinski-
Harabasz Index is considered the best.
3. Davies-Bouldin Index Range: 0 to ∞ (lower is better) Interpretation: Lower Values: Indicate
that the clusters are compact and well-separated from each other. Desired Value: Values
closer to 0 are better. Similar to the Calinski-Harabasz Index, you should compare the index
across different models or configurations; the configuration with the lowest Davies-Bouldin
Index is considered the best.
https://colab.research.google.com/drive/1BJI6xGTQTlQevjT_bntWDfa2FPmcqtow 6/6
27/05/2024, 10:27 car sales prediction.ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df.head()
Maruti
0 2007 60000 70000 Petrol Individual Manual
800 AC Ow
Maruti
Wagon
1 2007 135000 50000 Petrol Individual Manual
R LXI Ow
Minor
df.size
34720
df.shape
(4340, 8)
df.describe()
df.info
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None,
max_cols: int | None=None, memory_usage: bool | str | None=None,
show_counts: bool | None=None) -> None
df.columns
df['fuel'].value_counts().plot(kind='bar')
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 1/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
<Axes: xlabel='fuel'>
df['transmission'].value_counts().plot(kind='bar')
<Axes: xlabel='transmission'>
df.groupby('transmission')['selling_price'].min()
transmission
Automatic 79000
Manual 20000
Name: selling_price, dtype: int64
Data Preprocessing
df['seller_type'].value_counts()
seller_type
Individual 3244
Dealer 994
Trustmark Dealer 102
Name: count, dtype: int64
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 2/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
def stc(string):
v=0
if(string=='Individual'):
v=0
elif(string=='Dealer'):
v=1
elif(string=='Trustmark Dealer'):
v=2
return v
df['seller_type_N']=df['seller_type'].apply(stc)
df[df['seller_type']=='Dealer']
Toyota
Corolla
12 2018 1650000 25000 Petrol Dealer Automati
Altis 1.8 VL
CVT
Toyota
Corolla
25 2018 1650000 25000 Petrol Dealer Automati
Altis 1.8 VL
CVT
Maruti Ciaz
26 2015 585000 24000 Petrol Dealer Manua
VXi Plus
Hyundai
27 Venue SX 2019 1195000 5000 Diesel Dealer Manua
Opt Diesel
Jaguar XF
29 2.2 Litre 2014 1964999 28000 Diesel Dealer Automati
Luxury
Audi Q5
3.0 TDI
4304 2018 3899000 22000 Diesel Dealer Automati
Quattro
Technology
Hyundai
4306 i10 Sportz 2011 235000 43100 Petrol Dealer Manua
1.2
df['transmission'].unique()
def ttc(string):
v=0
if(string=='Manual'):
v=0
elif(string=='Automatic'):
v=1
return v
df['transmissionN']=df['transmission'].apply(ttc)
lef=LabelEncoder()
df['fuelN']=lef.fit_transform(df['fuel'])
df['fuelN']
0 4
1 4
2 1
3 4
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 3/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
4 1
..
4335 1
4336 1
4337 4
4338 1
4339 4
Name: fuelN, Length: 4340, dtype: int64
df['fuel'].unique()
df=df.drop('fuel',axis=1)
df=df.drop('seller_type',axis=1)
df=df.drop("transmission",axis=1)
leo=LabelEncoder()
df['ownerN']=leo.fit_transform(df['owner'])
df=df.drop('owner',axis=1)
lec=LabelEncoder()
df['nameN']=lec.fit_transform(df['name'])
df=df.drop('name',axis=1)
df.head()
df.columns
X=df.drop('selling_price',axis=1).values.reshape(4340,7)
X.shape
(4340, 7)
y=df.selling_price.values.reshape(-1,1)
y
array([[ 60000],
[135000],
[600000],
...,
[110000],
[865000],
[225000]])
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.3)
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 4/5
27/05/2024, 10:27 car sales prediction.ipynb - Colab
LR=LinearRegression()
LR.fit(xtrain,ytrain)
▾ LinearRegression
LinearRegression()
pred=LR.predict(xtest)
df1.name.unique()
df1.transmission.unique()
https://colab.research.google.com/drive/1VRan8E5SgFCLBaTxlfYjLxUGly0m_VdR#scrollTo=rji1C1FoPnDi 5/5