Python 2

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 28

import numpy as np

a=np.array([1,2,3])
print(a)

[1 2 3]

import numpy as np
a=np.array([[1,2],[2,3],[4,5]])
print(a.ndim)

import numpy as np
a=np.zeros((3,3))
print(a)

[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]

import numpy as np
c=np.full((2,2),5)
print(c)

[[5 5]
[5 5]]

import numpy as np
a=np.random.random((2,2))
print(a)

[[0.71052385 0.22235215]
[0.2020739 0.21300174]]

import numpy as np
a=np.arange(0,20,5)
print(a)

[ 0 5 10 15]

import numpy as np
a=np.array([[1,2,3],[4,5,3]])
b=a.reshape(3,2)
print(b)

[[1 2]
[3 4]
[5 3]]

import numpy as np
a=np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(a)

[[[ 1 2 3]
[ 4 5 6]]

[[ 7 8 9]
[10 11 12]]]

In [14]:
import numpy as np
a=np.array([[1,2],[3,4]])
print(a)

[[1 2]
[3 4]]

import numpy as np
a=np.array([[1,2],[3,5]])
b=a.flatten()
print(b)

[1 2 3 5]
import numpy as np
print(np.__version__)

1.24.3

import numpy as np
print(np.__version__)

1.24.3
import numpy as np
a=np.array([5,6,7,8,9])
print(a[2:4])

[7 8]

import numpy as np
a=np.array([9,7,5,43,2])
print(a[3:])

[43 2]

import numpy as np
a=np.array([8,3,0,1,6,7])
print(a[:5])

[8 3 0 1 6]

import numpy as np
a=np.array([1,2,3,4,5,6,7])
print(a[::2])

[1 3 5 7]

import numpy as np
a=np.array([[1,2,3,4],[5,6,7,8]])
print(a[0,0:4])

[1 2 3 4]

import numpy as np
a=np.array([[1,4,7,3],[6,8,8,3],[4,3,4,2],[2,9,2,7]])
print(a)

[[1 4 7 3]
[6 8 8 3]
[4 3 4 2]
[2 9 2 7]]

import numpy as np
a=np.array([[1,4,7,3],[6,8,8,3],[4,3,4,2],[2,9,2,7]])
print(a[0:3,0:4])

[[1 4 7 3]
[6 8 8 3]
[4 3 4 2]]

import numpy as np
a=np.array([[1,3,67,4],[3,8,2,9],[1,8,4,3],[9,2,6,3]])
print(a[0:4:2])

In [24]:
import numpy as np
a=np.array([[1,3,5,6],[3,5,9,9],[3,4,2,3],[4,92,29,3]])
print(a[0:2,0:3])

[[1 3 5]
[3 5 9]]

import numpy as np
a=np.arange(10)
s=slice(2,7,2)
print(a[s])

[2 4 6]

import numpy as np
a=np.array([1,2,3,4,5,6,7])
print(a[1:5])

[2 3 4 5]

import numpy as np
a=np.array([[1,2,3],[4,5,6]])
print(a[1,0:2])

[4 5]

import numpy as np
a=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(a[2,1:3])

[10 11]

import numpy as np
a=np.array([[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],[[17,18,19,20],[21,22,23,24],[25,26,27,28],[29,30,31,32]]])
print(a[1,2])

[25 26 27 28]

import numpy as np
a=np.array([[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],[[17,18,19,20],[21,22,23,24],[25,26,27,28],[29,30,31,32]]])
print(a[1,0:4:2])

[[17 18 19 20]
[25 26 27 28]]

import numpy as np
x=np.array((3,5,7))
y=np.array((5,7,9))
print(np.hstack((x,y)))

[3 5 7 5 7 9]

import numpy as np
x=np.array([[3], [5], [7]])
y=np.array([[5], [7], [9]])
print(np.hstack((x,y)))

[[3 5]
[5 7]
[7 9]]

import numpy as np
x = np.array([3, 5, 7])
y = np.array([5, 7, 9])
print(np.vstack((x,y)))

[[3 5 7]
[5 7 9]]

import numpy as np
x=np.array([[3], [5], [7]])
y=np.array([[5], [7], [9]])
print(np.vstack((x,y)))

[[3]
[5]
[7]
[5]
[7]
[9]]

import numpy as np
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr = np.dstack((arr1, arr2))
print(arr)

[[[1 4]
[2 5]
[3 6]]]

import numpy as np
a=np.array([1, 2, 3, 4, 5, 6])
print(np.array_split(a, 3))

[array([1, 2]), array([3, 4]), array([5, 6])]


import numpy as np
arr1 =[10,20,30,40,50]
arr2 =[2,4,5,8,10]
a = np.array(arr1)
b= np.array(arr2)
print("orginal arrays")
print(a)
print(b)
print("\n vector addition")
print(a+b)
print("\n vector subtrction")
print(a-b)
print("\n vector divition")
print(a/b)
print("\n vector dotproduct")
print(a.dot(b))
print("\n scalar multiplication ")
sclr =5
print("scalar value =",sclr)
print("array =",a)
print("result=",a*sclr)

def my_func(x,y):
if x>y:
return x-y
else:
return x+y
print("\n\n numpy.vectorize methord")
print("(return x-y if x>y otherwise return x+y)")
arr1 =[10,4,20]
arr2 =[2,3,30]
vec_func = np.vectorize(my_func)
print("array:",arr1)
print("array:",arr2)
print("result:",vec_func(arr1,arr2))

orginal arrays
[10 20 30 40 50]
[ 2 4 5 8 10]

vector addition
[12 24 35 48 60]

vector subtrction
[ 8 16 25 32 40]

vector divition
[5. 5. 6. 5. 5.]

vector dotproduct
1070

scalar multiplication
scalar value = 5
array = [10 20 30 40 50]
result= [ 50 100 150 200 250]

numpy.vectorize methord
(return x-y if x>y otherwise return x+y)
array: [10, 4, 20]
array: [2, 3, 30]
result: [ 8 1 50]

In [1]:
import pandas as pd
import numpy as np
print("DataFrame.apply function:\n")

DataFrame.apply function:

In [2]:
info=pd.DataFrame([(2,7)],columns=['P','Q'])
print("\nOriginal DataFrame:\n",info)

Original DataFrame:
P Q
0 2 7

print("\nDataFrame.Assign Function:\n")
d2=pd.DataFrame([['Dale',123],['Mark',143]],columns=['Emp','ID'])
print("\nOriginal DataFrame:\n",d2)
d2['Age']=(35,40)
print("\nAdding new column:\n",d2)
d=d2.assign(gender=['Male','Male'])
print("\nAdding new column:\n",d)

DataFrame.Assign Function:

Original DataFrame:
Emp ID
0 Dale 123
1 Mark 143

Adding new column:


Emp ID Age
0 Dale 123 35
1 Mark 143 40

Adding new column:


Emp ID Age gender
0 Dale 123 35 Male
1 Mark 143 40 Male

import pandas as pd
import numpy as np
print("DataFrame.apply function:\n")

DataFrame.apply function:

info=pd.DataFrame([(2,7)],columns=['P','Q'])
print("\nOriginal DataFrame:\n",info)

Original DataFrame:
P Q
0 2 7

print("\nSquare root of DataFrame:\n",info.apply(np.sqrt))


print("\nSum of each Column:\n",info.apply(np.sum,axis=0))
print("\nSum of each row:\n",info.apply(np.sum,axis=1))

Square root of DataFrame:


P Q
0 1.414214 2.645751

Sum of each Column:


P 2
Q 7
dtype: int64

Sum of each row:


0 9
dtype: int64

info=pd.DataFrame([[1,5,7],[2,7,8],[3,6,9]],columns=['X','Y','Z'])
print("\nOriginal DataFrame:\n",info)
print("\nMinimum and Maximum of each column:\n")
print(info.agg(['min','max']))

Original DataFrame:
X Y Z
0 1 5 7
1 2 7 8
2 3 6 9

Minimum and Maximum of each column:

X Y Z
min 1 5 7
max 3 7 9

print("\nDataFrame.Assign Function:\n")
d2=pd.DataFrame([['Dale',123],['Mark',143]],columns=['Emp','ID'])
print("\nOriginal DataFrame:\n",d2)
d2['Age']=(35,40)
print("\nAdding new column:\n",d2)
d=d2.assign(gender=['Male','Male'])
print("\nAdding new column:\n",d)

DataFrame.Assign Function:

Original DataFrame:
Emp ID
0 Dale 123
1 Mark 143

Adding new column:


Emp ID Age
0 Dale 123 35
1 Mark 143 40

Adding new column:


Emp ID Age gender
0 Dale 123 35 Male
1 Mark 143 40 Male
print("\nDataFrame.Sortfunction:\n")
info=pd.DataFrame(np.random.rand(5,2),index=[3,2,0,4,1],columns=('col 3','col 4'))
print(info)

DataFrame.Sortfunction:

col 3 col 4
3 0.360301 0.668302
2 0.675958 0.197902
0 0.846554 0.940530
4 0.813285 0.086068
1 0.812191 0.437882

print("\nDataFrame.Mergefunction:\n")
left=pd.DataFrame({'id':[1,2,3,4,5],'Name':['Alex','Amy','Allen','Alice','Ayoung'],'Sub':['Sub1','Sub2','sub4','sub6','sub5']})

DataFrame.Mergefunction:

right=pd.DataFrame({'id':[1,2,3,4,5],'Name':['billy','brian','bran','bryce','betty'],'sub':['sub2','sub4','sub3','sub6','sub5']})
print(left)
print(right)
print(pd.merge(left,right,on='id'))

id Name Sub
0 1 Alex Sub1
1 2 Amy Sub2
2 3 Allen sub4
3 4 Alice sub6
4 5 Ayoung sub5
id Name sub
0 1 billy sub2
1 2 brian sub4
2 3 bran sub3
3 4 bryce sub6
4 5 betty sub5
id Name_x Sub Name_y sub
0 1 Alex Sub1 billy sub2
1 2 Amy Sub2 brian sub4
2 3 Allen sub4 bran sub3
3 4 Alice sub6 bryce sub6
4 5 Ayoung sub5 betty sub5
import pandas as pd
import numpy as np
arr=np.array(['P','a','n','d','a','s'])
a=pd.Series(arr)
print("Series from array:")
print(a)

Series from array:


0 P
1 a
2 n
3 d
4 a
5 s
dtype: object

arr={'x':0.,'y':1.,'z':2.}
b=pd.Series(arr)
print("\n\nSeries from dictionary:\n")
print(b)

Series from dictionary:

x 0.0
y 1.0
z 2.0
dtype: float64

x=pd.Series (4,index=[0,1,2,3])
print("\nSeries using scalar\n")
print(x)

Series using scalar


0 4
1 4
2 4
3 4
dtype: int64

x=pd.Series([1,2,3],index=['a','b','c'])
print("\nSeries through index:")
print(x)
a=pd.Series(data=[1,2,3,4])
print("\n a series :\n",a)
print("\nIndex:\n",a.index)
print("\nvalues:\n",a.values)
print("\nshape:",a.shape)
print("\nDimension:",a.ndim)
print("\nSize",a.size)

Series through index:


a 1
b 2
c 3
dtype: int64

a series :
0 1
1 2
2 3
3 4
dtype: int64

Index:
RangeIndex(start=0, stop=4, step=1)

values:
[1 2 3 4]

shape: (4,)

Dimension: 1

Size 4
import pandas as pd
d=pd.read_excel("C:hemanth41.xlsx")
print("Get the table data:\n")
print(d)

Get the table data:

sno Name Reg no Dept sub1 sub2 sub3 sub4 sub5 TOTAL
0 1 walter vel 12 EEE 60 75 80 85 91 456
1 2 thomas 19 EEE 87 88 89 91 90 423
2 3 tommy 5 EEE 30 45 20 70 80 360
3 4 arthur 2 EEE 30 32 34 50 70 300
4 5 patrick 20 EEE 34 56 31 23 21 222
5 6 chris evans 3 EEE 32 43 54 65 76 323
6 7 jeny 7 EEE 56 78 98 87 43 362
7 8 kavitha 9 EEE 41 54 78 87 98 419
8 9 bruce 10 EEE 32 43 21 78 90 312
9 10 jason 15 EEE 50 20 30 40 10 150

df=pd.DataFrame(d)
print("\n Get the column heading\n",df.columns)
print("\n Get the shape-(no.of rows,no.of columns)\n",df.shape)
print("\n Get particular column values\n",df['Reg no'])
print("\n Extract\slice the table values-[including this row,excluding this row]\n",df[2:5])
print("\n Get the particular row values-through number identification\n",df.loc[7])
print("\n Get the particular row value-through'Register number'indentification\n",d.loc[d['Reg no']==105])
df=d['TOTAL']/5
print("\nmake an average of total marks:\n",df)

Get the column heading


Index(['sno', 'Name', 'Reg no', 'Dept', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5',
'TOTAL'],
dtype='object')
Get the shape-(no.of rows,no.of columns)
(10, 10)

Get particular column values


0 12
1 19
2 5
3 2
4 20
5 3
6 7
7 9
8 10
9 15
Name: Reg no, dtype: int64

Extract\slice the table values-[including this row,excluding this row]


sno Name Reg no Dept sub1 sub2 sub3 sub4 sub5 TOTAL
2 3 tommy 5 EEE 30 45 20 70 80 360
3 4 arthur 2 EEE 30 32 34 50 70 300
4 5 patrick 20 EEE 34 56 31 23 21 222

Get the particular row values-through number identification


sno 8
Name kavitha
Reg no 9
Dept EEE
sub1 41
sub2 54
sub3 78
sub4 87
sub5 98
TOTAL 419
Name: 7, dtype: object

Get the particular row value-through'Register number'indentification


Empty DataFrame
Columns: [sno, Name, Reg no, Dept, sub1, sub2, sub3, sub4, sub5, TOTAL]
Index: []

make an average of total marks:


0 91.2
1 84.6
2 72.0
3 60.0
4 44.4
5 64.6
6 72.4
7 83.8
8 62.4
9 30.0
Name: TOTAL, dtype: float64
import pandas as pd
#creating dataframe
df=pd.DataFrame([[11,21,31],[1,22,32],[315,32,33]],index=['one','two','three'],columns=['a','b','c'])
print(df)
Output:
abc
one 11 21 31
two 1 22 32
three 315 32 33
#Taking the input from DataFrame and storing in the Excel File
print("Taking the input from DataFrame and storing in the Excel File")
df.to_excel('C:Excel_for_store.xlsx',sheet_name='store')
Output:
Taking the input from DataFrame and storing in the Excel File
#Second DataFrame input to another excel file
d=pd.DataFrame([[110,210,310],[12,220,320],[310,320,330]],index=['four','five','six'],columns=['a','b'
,'c'])
d.to_excel("C:Excel_for_store1.xlsx",sheet_name='store1')
#Merging two Excel files input into third file
x=pd.read_excel("C:Excel_for_store.xlsx")#reading first excel file
y=pd.read_excel("C:Excel_for_store1.xlsx")#reading second excel file
z=pd.concat([x,y])#concatenating excel files 1 and 2
z.to_excel("C:Excel_for_store2.xlsx")
#Sorting the column values
df=z.sort_values(['a'])
print(df)
df.to_excel("C:Excel_for_store3.xlsx")#storing it as excel file

34

Output:
Unnamed: 0 a b c
1 two 1 22 32
0 one 11 21 31
1 five 12 220 320
0 four 110 210 310
2 six 310 320 330
2 three 315 32 33
#Reading a new csv file
df=pd.read_csv("C:store-csv.csv")
print(df)
Output:
Unnamed 0 a b c
0 1 two 1 22 32
1 0 one 11 21 31
Code:
print(list(df))
Output:
['Unnamed', '0', 'a', 'b', 'c']
Code:
print(format(len(df)))
Output:
2
import numpy as np
from numpy import random
a=np.zeros((5,4))
for i in range(5):
a[i,0]=50
a[i,1]=random.randint(30,180)
b=a[i,0]*np.sin(np.radians(a[i,1]))
a[i,2]=np.round_(b, decimals=2)
a[i,3]=a[i,2]/10
c=np.zeros(5)
for i in range(5):
c[i]=a[i,2]
print(a)

[[ 50. 58. 42.4 4.24 ]


[ 50. 125. 40.96 4.096]
[ 50. 146. 27.96 2.796]
[ 50. 124. 41.45 4.145]
[ 50. 103. 48.72 4.872]]

print(max(c))
48.72

print(min(c))
27.96

print(sum(c))
201.48999999999998

print(sum(c)/5)
40.297999999999995

print(np.sort(c))
[27.96 40.96 41.45 42.4 48.72]

print(a.T)
[[ 50. 50. 50. 50. 50. ]
[ 58. 125. 146. 124. 103. ]
[ 42.4 40.96 27.96 41.45 48.72 ]
[ 4.24 4.096 2.796 4.145 4.872]]

import pandas as pd
d=pd.read_excel("/ElectricCarData_Norm-2.xlsx")
print(d)
Brand Model Accel TopSpeed Range \
0 Tesla Model 3 Long Range Dual Motor 4.6 sec 233 km/h 450 km
1 Volkswagen ID.3 Pure 10.0 sec 160 km/h 270 km
2 Polestar 2 4.7 sec 210 km/h 400 km
3 BMW iX3 6.8 sec 180 km/h 360 km
4 Honda e 9.5 sec 145 km/h 170 km
.. ... ... ... ... ...
98 Nissan Ariya 63kWh 7.5 sec 160 km/h 330 km
99 Audi e-tron S Sportback 55 quattro 4.5 sec 210 km/h 335 km
100 Nissan Ariya e-4ORCE 63kWh 5.9 sec 200 km/h 325 km
101 Nissan Ariya e-4ORCE 87kWh Performance 5.1 sec 200 km/h 375 km
102 Byton M-Byte 95 kWh 2WD 7.5 sec 190 km/h 400 km

Efficiency FastCharge RapidCharge PowerTrain \


0 161 Wh/km 940 km/h Rapid charging possible All Wheel Drive
1 167 Wh/km 250 km/h Rapid charging possible Rear Wheel Drive
2 181 Wh/km 620 km/h Rapid charging possible All Wheel Drive
3 206 Wh/km 560 km/h Rapid charging possible Rear Wheel Drive
4 168 Wh/km 190 km/h Rapid charging possible Rear Wheel Drive
.. ... ... ... ...
98 191 Wh/km 440 km/h Rapid charging possible Front Wheel Drive
99 258 Wh/km 540 km/h Rapid charging possible All Wheel Drive
100 194 Wh/km 440 km/h Rapid charging possible All Wheel Drive
101 232 Wh/km 450 km/h Rapid charging possible All Wheel Drive
102 238 Wh/km 480 km/h Rapid charging possible All Wheel Drive

PlugType BodyStyle Segment Seats PriceEuro


0 Type 2 CCS Sedan D 5 55480
1 Type 2 CCS Hatchback C 5 30000
2 Type 2 CCS Liftback D 5 56440
3 Type 2 CCS SUV D 5 68040
4 Type 2 CCS Hatchback B 4 32997
.. ... ... ... ... ...
98 Type 2 CCS Hatchback C 5 45000
99 Type 2 CCS SUV E 5 96050
100 Type 2 CCS Hatchback C 5 50000
101 Type 2 CCS Hatchback C 5 65000
102 Type 2 CCS SUV E 5 62000

[103 rows x 14 columns]

a=pd.DataFrame(d)
print(a)

Brand Model Accel TopSpeed Range \


0 Tesla Model 3 Long Range Dual Motor 4.6 sec 233 km/h 450 km
1 Volkswagen ID.3 Pure 10.0 sec 160 km/h 270 km
2 Polestar 2 4.7 sec 210 km/h 400 km
3 BMW iX3 6.8 sec 180 km/h 360 km
4 Honda e 9.5 sec 145 km/h 170 km
.. ... ... ... ... ...
98 Nissan Ariya 63kWh 7.5 sec 160 km/h 330 km
99 Audi e-tron S Sportback 55 quattro 4.5 sec 210 km/h 335 km
100 Nissan Ariya e-4ORCE 63kWh 5.9 sec 200 km/h 325 km
101 Nissan Ariya e-4ORCE 87kWh Performance 5.1 sec 200 km/h 375 km
102 Byton M-Byte 95 kWh 2WD 7.5 sec 190 km/h 400 km

Efficiency FastCharge RapidCharge PowerTrain \


0 161 Wh/km 940 km/h Rapid charging possible All Wheel Drive
1 167 Wh/km 250 km/h Rapid charging possible Rear Wheel Drive
2 181 Wh/km 620 km/h Rapid charging possible All Wheel Drive
3 206 Wh/km 560 km/h Rapid charging possible Rear Wheel Drive
4 168 Wh/km 190 km/h Rapid charging possible Rear Wheel Drive
.. ... ... ... ...
98 191 Wh/km 440 km/h Rapid charging possible Front Wheel Drive
99 258 Wh/km 540 km/h Rapid charging possible All Wheel Drive
100 194 Wh/km 440 km/h Rapid charging possible All Wheel Drive
101 232 Wh/km 450 km/h Rapid charging possible All Wheel Drive
102 238 Wh/km 480 km/h Rapid charging possible All Wheel Drive

PlugType BodyStyle Segment Seats PriceEuro


0 Type 2 CCS Sedan D 5 55480
1 Type 2 CCS Hatchback C 5 30000
2 Type 2 CCS Liftback D 5 56440
3 Type 2 CCS SUV D 5 68040
4 Type 2 CCS Hatchback B 4 32997
.. ... ... ... ... ...
98 Type 2 CCS Hatchback C 5 45000
99 Type 2 CCS SUV E 5 96050
100 Type 2 CCS Hatchback C 5 50000
101 Type 2 CCS Hatchback C 5 65000
102 Type 2 CCS SUV E 5 62000

[103 rows x 14 columns]

print(a.columns)

Index(['Brand', 'Model', 'Accel', 'TopSpeed', 'Range', 'Efficiency',


'FastCharge', 'RapidCharge', 'PowerTrain', 'PlugType', 'BodyStyle',
'Segment', 'Seats', 'PriceEuro'],
dtype='object')

print(a.shape)
(103, 14)
print(a.iloc[ : , 6:9])

FastCharge RapidCharge PowerTrain


0 940 km/h Rapid charging possible All Wheel Drive
1 250 km/h Rapid charging possible Rear Wheel Drive
2 620 km/h Rapid charging possible All Wheel Drive
3 560 km/h Rapid charging possible Rear Wheel Drive
4 190 km/h Rapid charging possible Rear Wheel Drive
.. ... ... ...
98 440 km/h Rapid charging possible Front Wheel Drive
99 540 km/h Rapid charging possible All Wheel Drive
100 440 km/h Rapid charging possible All Wheel Drive
101 450 km/h Rapid charging possible All Wheel Drive
102 480 km/h Rapid charging possible All Wheel Drive

[103 rows x 3 columns]

print(a.loc[5])
ndex(['Brand', 'Model', 'Accel', 'TopSpeed', 'Range', 'Efficiency',
Brand Lucid
Model Air
Accel 2.8 sec
TopSpeed 250 km/h
Range 610 km
Efficiency 180 Wh/km
FastCharge 620 km/h
RapidCharge Rapid charging possible
PowerTrain All Wheel Drive
PlugType Type 2 CCS
BodyStyle Sedan
Segment F
Seats 5
PriceEuro 105000
Name: 5, dtype: object
print(a.drop("RapidCharge",axis='columns'))
Brand Model Accel TopSpeed Range \
0 Tesla Model 3 Long Range Dual Motor 4.6 sec 233 km/h 450 km
1 Volkswagen ID.3 Pure 10.0 sec 160 km/h 270 km
2 Polestar 2 4.7 sec 210 km/h 400 km
3 BMW iX3 6.8 sec 180 km/h 360 km
4 Honda e 9.5 sec 145 km/h 170 km
.. ... ... ... ... ...
98 Nissan Ariya 63kWh 7.5 sec 160 km/h 330 km
99 Audi e-tron S Sportback 55 quattro 4.5 sec 210 km/h 335 km
100 Nissan Ariya e-4ORCE 63kWh 5.9 sec 200 km/h 325 km
101 Nissan Ariya e-4ORCE 87kWh Performance 5.1 sec 200 km/h 375 km
102 Byton M-Byte 95 kWh 2WD 7.5 sec 190 km/h 400 km

Efficiency FastCharge PowerTrain PlugType BodyStyle Segment \


0 161 Wh/km 940 km/h All Wheel Drive Type 2 CCS Sedan D
1 167 Wh/km 250 km/h Rear Wheel Drive Type 2 CCS Hatchback C
2 181 Wh/km 620 km/h All Wheel Drive Type 2 CCS Liftback D
3 206 Wh/km 560 km/h Rear Wheel Drive Type 2 CCS SUV D
4 168 Wh/km 190 km/h Rear Wheel Drive Type 2 CCS Hatchback B
.. ... ... ... ... ... ...
98 191 Wh/km 440 km/h Front Wheel Drive Type 2 CCS Hatchback C
99 258 Wh/km 540 km/h All Wheel Drive Type 2 CCS SUV E
100 194 Wh/km 440 km/h All Wheel Drive Type 2 CCS Hatchback C
101 232 Wh/km 450 km/h All Wheel Drive Type 2 CCS Hatchback C
102 238 Wh/km 480 km/h All Wheel Drive Type 2 CCS SUV E

Seats PriceEuro
0 5 55480
1 5 30000
2 5 56440
3 5 68040
4 4 32997
.. ... ...
98 5 45000
99 5 96050
100 5 50000
101 5 65000
102 5 62000

[103 rows x 13 columns]

Pandas CaseStudy
1. Create a dataset on Automobile ( columns : product_id, brand, price, body style, color, rows: 10)
2.Name the dataset as Automobile Data and get the number of columns and names of the columns
3.Print all the columns for price less than 10 lakh and create a new dataset
4.Create a new column for mileage and cc
5.Append the created column to the dataset
6.Extract 5th row and 2nd column
7.Print the dimension, values, axes, shape, size and data type of each column
8.Create a new dataset of its specifications with 3 rows and append to the dataset
9.Print the updated dataset
10.Sort by price
11.Delete 3rd row and 4th column
12.Use the aggregate functions on price column
13.Print the first 3 rows and last 3 rows
14.Change the 6th rows with new values and print the dataset
15.Convert the dataset into csv file

import pandas as pd

# 1. Create a dataset
data = {
'product_id': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
'brand': ['Toyota', 'Honda', 'Ford', 'Chevrolet', 'Nissan', 'Hyundai', 'Volkswagen', 'BMW', 'Audi', 'Mercedes'],
'price': [900000, 800000, 1100000, 950000, 850000, 750000, 1300000, 1800000, 1600000, 2200000],
'body_style': ['Sedan', 'SUV', 'Hatchback', 'Sedan', 'SUV', 'Hatchback', 'Sedan', 'SUV', 'Hatchback', 'Sedan'],
'color': ['Red', 'Blue', 'Black', 'White', 'Silver', 'Gray', 'Blue', 'Black', 'Red', 'White']
}

df = pd.DataFrame(data)
print(df)

product_id brand price body_style color


0 101 Toyota 900000 Sedan Red
1 102 Honda 800000 SUV Blue
2 103 Ford 1100000 Hatchback Black
3 104 Chevrolet 950000 Sedan White
4 105 Nissan 850000 SUV Silver
5 106 Hyundai 750000 Hatchback Gray
6 107 Volkswagen 1300000 Sedan Blue
7 108 BMW 1800000 SUV Black
8 109 Audi 1600000 Hatchback Red
9 110 Mercedes 2200000 Sedan White

In [8]:
# 2. Name the dataset and get column information
df.name = 'Automobile Data'
num_columns = len(df.columns)
column_names = df.columns.tolist()
print(column_names)

['product_id', 'brand', 'price', 'body_style', 'color']

In [9]:
# 3. Print columns for price less than 10 lakh and create a new dataset
df_less_than_10_lakh = df[df['price'] < 1000000]
print(df_less_than_10_lakh)

product_id brand price body_style color


0 101 Toyota 900000 Sedan Red
1 102 Honda 800000 SUV Blue
3 104 Chevrolet 950000 Sedan White
4 105 Nissan 850000 SUV Silver
5 106 Hyundai 750000 Hatchback Gray

In [10]:
# 4. Create new columns for mileage and cc
df['mileage'] = [20, 18, 22, 21, 19, 17, 23, 16, 15, 14]
df['cc'] = [1500, 1600, 1200, 1400, 1700, 1300, 1800, 1900, 2000, 2200]
print(df)

product_id brand price body_style color mileage cc


0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
2 103 Ford 1100000 Hatchback Black 22 1200
3 104 Chevrolet 950000 Sedan White 21 1400
4 105 Nissan 850000 SUV Silver 19 1700
5 106 Hyundai 750000 Hatchback Gray 17 1300
6 107 Volkswagen 1300000 Sedan Blue 23 1800
7 108 BMW 1800000 SUV Black 16 1900
8 109 Audi 1600000 Hatchback Red 15 2000
9 110 Mercedes 2200000 Sedan White 14 2200

In [12]:
# 5. Append the created columns to the dataset
df = df[['product_id', 'brand', 'price', 'body_style', 'color', 'mileage', 'cc']]
print(df)

product_id brand price body_style color mileage cc


0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
2 103 Ford 1100000 Hatchback Black 22 1200
3 104 Chevrolet 950000 Sedan White 21 1400
4 105 Nissan 850000 SUV Silver 19 1700
5 106 Hyundai 750000 Hatchback Gray 17 1300
6 107 Volkswagen 1300000 Sedan Blue 23 1800
7 108 BMW 1800000 SUV Black 16 1900
8 109 Audi 1600000 Hatchback Red 15 2000
9 110 Mercedes 2200000 Sedan White 14 2200

In [11]:
# 6. Extract 5th row and 2nd column
row_5_col_2 = df.iloc[4, 1]
print(row_5_col_2)

Nissan

In [13]:
# 7. Print dimension, values, axes, shape, size, and data type of each column
print(df.ndim)
print(df.values)
print(df.axes)
print(df.shape)
print(df.size)
print(df.dtypes)

2
[[101 'Toyota' 900000 'Sedan' 'Red' 20 1500]
[102 'Honda' 800000 'SUV' 'Blue' 18 1600]
[103 'Ford' 1100000 'Hatchback' 'Black' 22 1200]
[104 'Chevrolet' 950000 'Sedan' 'White' 21 1400]
[105 'Nissan' 850000 'SUV' 'Silver' 19 1700]
[106 'Hyundai' 750000 'Hatchback' 'Gray' 17 1300]
[107 'Volkswagen' 1300000 'Sedan' 'Blue' 23 1800]
[108 'BMW' 1800000 'SUV' 'Black' 16 1900]
[109 'Audi' 1600000 'Hatchback' 'Red' 15 2000]
[110 'Mercedes' 2200000 'Sedan' 'White' 14 2200]]
[RangeIndex(start=0, stop=10, step=1), Index(['product_id', 'brand', 'price', 'body_style', 'color', 'mileage', 'cc'], dtype='object')]
(10, 7)
70
product_id int64
brand object
price int64
body_style object
color object
mileage int64
cc int64
dtype: object

In [14]:
# 8. Create a new dataset of its specifications with 3 rows and append to the dataset
specifications = {
'product_id': [111, 112, 113],
'brand': ['Kia', 'Mazda', 'Subaru'],
'price': [1200000, 1000000, 1100000],
'body_style': ['SUV', 'Sedan', 'Hatchback'],
'color': ['Gray', 'Green', 'Orange'],
'mileage': [16, 17, 18],
'cc': [1700, 1500, 1400]
}
df_specifications = pd.DataFrame(specifications)
df = df.append(df_specifications, ignore_index=True)
print(df)

product_id brand price body_style color mileage cc


0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
2 103 Ford 1100000 Hatchback Black 22 1200
3 104 Chevrolet 950000 Sedan White 21 1400
4 105 Nissan 850000 SUV Silver 19 1700
5 106 Hyundai 750000 Hatchback Gray 17 1300
6 107 Volkswagen 1300000 Sedan Blue 23 1800
7 108 BMW 1800000 SUV Black 16 1900
8 109 Audi 1600000 Hatchback Red 15 2000
9 110 Mercedes 2200000 Sedan White 14 2200
10 111 Kia 1200000 SUV Gray 16 1700
11 112 Mazda 1000000 Sedan Green 17 1500
12 113 Subaru 1100000 Hatchback Orange 18 1400

C:\Users\Tcs\AppData\Local\Temp\ipykernel_3956\1105582730.py:12: FutureWarning: The frame.append method is deprecated and will be


removed from pandas in a future version. Use pandas.concat instead.
df = df.append(df_specifications, ignore_index=True)

In [15]:
# 9. Print the updated dataset
print("Updated Dataset:")
print(df)

Updated Dataset:
product_id brand price body_style color mileage cc
0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
2 103 Ford 1100000 Hatchback Black 22 1200
3 104 Chevrolet 950000 Sedan White 21 1400
4 105 Nissan 850000 SUV Silver 19 1700
5 106 Hyundai 750000 Hatchback Gray 17 1300
6 107 Volkswagen 1300000 Sedan Blue 23 1800
7 108 BMW 1800000 SUV Black 16 1900
8 109 Audi 1600000 Hatchback Red 15 2000
9 110 Mercedes 2200000 Sedan White 14 2200
10 111 Kia 1200000 SUV Gray 16 1700
11 112 Mazda 1000000 Sedan Green 17 1500
12 113 Subaru 1100000 Hatchback Orange 18 1400

In [16]:
# 10. Sort by price
df = df.sort_values(by='price')
print(df)

product_id brand price body_style color mileage cc


5 106 Hyundai 750000 Hatchback Gray 17 1300
1 102 Honda 800000 SUV Blue 18 1600
4 105 Nissan 850000 SUV Silver 19 1700
0 101 Toyota 900000 Sedan Red 20 1500
3 104 Chevrolet 950000 Sedan White 21 1400
11 112 Mazda 1000000 Sedan Green 17 1500
2 103 Ford 1100000 Hatchback Black 22 1200
12 113 Subaru 1100000 Hatchback Orange 18 1400
10 111 Kia 1200000 SUV Gray 16 1700
6 107 Volkswagen 1300000 Sedan Blue 23 1800
8 109 Audi 1600000 Hatchback Red 15 2000
7 108 BMW 1800000 SUV Black 16 1900
9 110 Mercedes 2200000 Sedan White 14 2200

In [17]:
# 11. Delete 3rd row and 4th column
df = df.drop(index=2, axis=0)
df = df.drop(columns=df.columns[3])
print(df)

product_id brand price color mileage cc


5 106 Hyundai 750000 Gray 17 1300
1 102 Honda 800000 Blue 18 1600
4 105 Nissan 850000 Silver 19 1700
0 101 Toyota 900000 Red 20 1500
3 104 Chevrolet 950000 White 21 1400
11 112 Mazda 1000000 Green 17 1500
12 113 Subaru 1100000 Orange 18 1400
10 111 Kia 1200000 Gray 16 1700
6 107 Volkswagen 1300000 Blue 23 1800
8 109 Audi 1600000 Red 15 2000
7 108 BMW 1800000 Black 16 1900
9 110 Mercedes 2200000 White 14 2200

In [18]:
# 12. Use aggregate functions on the price column
price_stats = df['price'].agg(['mean', 'median', 'min', 'max'])
print(price_stats)

mean 1.204167e+06
median 1.050000e+06
min 7.500000e+05
max 2.200000e+06
Name: price, dtype: float64

In [19]:
# 13. Print the first 3 rows and last 3 rows
print(df.head(3))
print(df.tail(3))

product_id brand price color mileage cc


5 106 Hyundai 750000 Gray 17 1300
1 102 Honda 800000 Blue 18 1600
4 105 Nissan 850000 Silver 19 1700
product_id brand price color mileage cc
8 109 Audi 1600000 Red 15 2000
7 108 BMW 1800000 Black 16 1900
9 110 Mercedes 2200000 White 14 2200

In [6]:
# 14. Convert the dataset into a CSV file
df.to_csv('Automobile_Data.csv', index=False)
print(df)

product_id brand price color mileage cc


5 106 Hyundai 750000 Gray 17 1300
1 102 Honda 800000 Blue 18 1600
4 105 Nissan 850000 Silver 19 1700
0 101 Toyota 900000 Red 20 1500
3 104 Chevrolet 950000 White 21 1400
11 112 Mazda 1000000 Green 17 1500
12 113 Subaru 1100000 Orange 18 1400
10 111 Kia 1200000 Gray 16 1700
6 107 Volkswagen 1300000 Blue 23 1800
8 109 Audi 1600000 Red 15 2000
7 108 BMW 1800000 Black 16 1900
9 110 Mercedes 2200000 White 14 2200

STORE
CASE STUDY – ELECTRIC VEHICLE
AIM:
To perform and generate the dataset for the given conditions and store the same.
DESCRIPTION:Create a Dataset with Brand, Type, Price, Warranty , Range and Sales Percentage (min. 5 details)Store the created
dataset.Create new dataset based on the type(Scooter/Bike)Merge the newly created file with the existing file.Sort the dataset based on the
sales percentage and store.Read and display the edited dataset.List the column headings and get the length of the table data.Create a Dataset
with same Brand in with IC Engines Merge the IC Engine Dataset with first Dataset with BrandSort the Price,Sales Percentage and Range.

import pandas as pd

# Create a Dataset with Brand, Type, Price, Warranty, Range, and Sales Percentage
data = {
'Brand': ['Tesla', 'Niu', 'Ather', 'Hero', 'Revolt'],
'Type': ['Car', 'Scooter', 'Scooter', 'Bike', 'Bike'],
'Price': [80000, 2000, 1500, 3000, 2500],
'Warranty': [4, 2, 2, 3, 2],
'Range': [300, 60, 80, 100, 90],
'Sales Percentage': [30, 15, 10, 5, 8]
}

ev_df = pd.DataFrame(data)
print(ev_df)

Brand Type Price Warranty Range Sales Percentage


0 Tesla Car 80000 4 300 30
1 Niu Scooter 2000 2 60 15
2 Ather Scooter 1500 2 80 10
3 Hero Bike 3000 3 100 5
4 Revolt Bike 2500 2 90 8

In [3]:
# Store the created dataset
ev_df.to_csv('electric_vehicle_data.csv', index=False)

# Create a new dataset based on the type (Scooter/Bike)


scooter_df = ev_df[ev_df['Type'] == 'Scooter']
bike_df = ev_df[ev_df['Type'] == 'Bike']
print(bike_df)

Brand Type Price Warranty Range Sales Percentage


3 Hero Bike 3000 3 100 5
4 Revolt Bike 2500 2 90 8

In [4]:
# Create a new dataset based on the type (Scooter/Bike)
scooter_df = ev_df[ev_df['Type'] == 'Scooter']
bike_df = ev_df[ev_df['Type'] == 'Bike']
print(bike_df)

Brand Type Price Warranty Range Sales Percentage


3 Hero Bike 3000 3 100 5
4 Revolt Bike 2500 2 90 8

In [5]:
# Merge the newly created file with the existing file
merged_df = pd.concat([ev_df, scooter_df], ignore_index=True)
print(merged_df)

Brand Type Price Warranty Range Sales Percentage


0 Tesla Car 80000 4 300 30
1 Niu Scooter 2000 2 60 15
2 Ather Scooter 1500 2 80 10
3 Hero Bike 3000 3 100 5
4 Revolt Bike 2500 2 90 8
5 Niu Scooter 2000 2 60 15
6 Ather Scooter 1500 2 80 10

In [6]:
# Sort the dataset based on the sales percentage and store
sorted_df = ev_df.sort_values(by='Sales Percentage', ascending=False)
sorted_df.to_csv('sorted_electric_vehicle_data.csv', index=False)

In [7]:
# Read and display the edited dataset
edited_df = pd.read_csv('sorted_electric_vehicle_data.csv')
print("Edited Dataset:")
print(edited_df)

Edited Dataset:
Brand Type Price Warranty Range Sales Percentage
0 Tesla Car 80000 4 300 30
1 Niu Scooter 2000 2 60 15
2 Ather Scooter 1500 2 80 10
3 Revolt Bike 2500 2 90 8
4 Hero Bike 3000 3 100 5

In [8]:
# List the column headings and get the length of the table data
columns = edited_df.columns.tolist()
print(len(edited_df))

In [11]:
# Create a Dataset with the same Brand with IC Engines
ic_engine_data = {
'Brand': ['Tesla', 'Niu', 'Ather', 'Hero', 'Revolt'],
'Engine Type': ['Electric', 'IC Engine', 'IC Engine', 'IC Engine', 'IC Engine']
}
ic_engine_df = pd.DataFrame(ic_engine_data)
print(ic_engine_df)

Brand Engine Type


0 Tesla Electric
1 Niu IC Engine
2 Ather IC Engine
3 Hero IC Engine
4 Revolt IC Engine

In [12]:
# Merge the IC Engine Dataset with the first Dataset with Brand
merged_with_ic_engine_df = pd.merge(ev_df, ic_engine_df, on='Brand', how='left')
print(merged_with_ic_engine_df)

Brand Type Price Warranty Range Sales Percentage Engine Type


0 Tesla Car 80000 4 300 30 Electric
1 Niu Scooter 2000 2 60 15 IC Engine
2 Ather Scooter 1500 2 80 10 IC Engine
3 Hero Bike 3000 3 100 5 IC Engine
4 Revolt Bike 2500 2 90 8 IC Engine

In [13]:
# Sort the Price, Sales Percentage, and Range
sorted_by_price = merged_with_ic_engine_df.sort_values(by='Price')
sorted_by_sales_percentage = merged_with_ic_engine_df.sort_values(by='Sales Percentage', ascending=False)
sorted_by_range = merged_with_ic_engine_df.sort_values(by='Range', ascending=False)
print(sorted_by_price)
print(sorted_by_sales_percentage)
print(sorted_by_range)

Brand Type Price Warranty Range Sales Percentage Engine Type


2 Ather Scooter 1500 2 80 10 IC Engine
1 Niu Scooter 2000 2 60 15 IC Engine
4 Revolt Bike 2500 2 90 8 IC Engine
3 Hero Bike 3000 3 100 5 IC Engine
0 Tesla Car 80000 4 300 30 Electric
Brand Type Price Warranty Range Sales Percentage Engine Type
0 Tesla Car 80000 4 300 30 Electric
1 Niu Scooter 2000 2 60 15 IC Engine
2 Ather Scooter 1500 2 80 10 IC Engine
4 Revolt Bike 2500 2 90 8 IC Engine
3 Hero Bike 3000 3 100 5 IC Engine
Brand Type Price Warranty Range Sales Percentage Engine Type
0 Tesla Car 80000 4 300 30 Electric
3 Hero Bike 3000 3 100 5 IC Engine
4 Revolt Bike 2500 2 90 8 IC Engine
2 Ather Scooter 1500 2 80 10 IC Engine
1 Niu Scooter 2000 2 60 15 IC Engine

In [ ]:

You might also like