Python 2

import numpy as np
a=np.array([1,2,3])
print(a)
[1 2 3]
import numpy as np
a=np.array([[1,2],[2,3],[4,5]])
print(a.ndim)
import numpy as np
a=np.zeros((3,3))
print(a)
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
import numpy as np
c=np.full((2,2),5)
print(c)
[[5 5]
[5 5]]
import numpy as np
a=np.random.random((2,2))
print(a)
[[0.71052385 0.22235215]
[0.2020739 0.21300174]]
import numpy as np
a=np.arange(0,20,5)
print(a)
[ 0 5 10 15]
import numpy as np
a=np.array([[1,2,3],[4,5,3]])
b=a.reshape(3,2)
print(b)
[[1 2]
[3 4]
[5 3]]
import numpy as np
a=np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(a)
[[[ 1 2 3]
[ 4 5 6]]
[[ 7 8 9]
[10 11 12]]]
In [14]:
import numpy as np
a=np.array([[1,2],[3,4]])
print(a)
[[1 2]
[3 4]]
import numpy as np
a=np.array([[1,2],[3,5]])
b=a.flatten()
print(b)
[1 2 3 5]
import numpy as np
print(np.__version__)
1.24.3
import numpy as np
print(np.__version__)
1.24.3
import numpy as np
a=np.array([5,6,7,8,9])
print(a[2:4])
[7 8]
import numpy as np
a=np.array([9,7,5,43,2])
print(a[3:])
[43 2]
import numpy as np
a=np.array([8,3,0,1,6,7])
print(a[:5])
[8 3 0 1 6]
import numpy as np
a=np.array([1,2,3,4,5,6,7])
print(a[::2])
[1 3 5 7]
import numpy as np
a=np.array([[1,2,3,4],[5,6,7,8]])
print(a[0,0:4])
[1 2 3 4]
import numpy as np
a=np.array([[1,4,7,3],[6,8,8,3],[4,3,4,2],[2,9,2,7]])
print(a)
[[1 4 7 3]
[6 8 8 3]
[4 3 4 2]
[2 9 2 7]]
import numpy as np
a=np.array([[1,4,7,3],[6,8,8,3],[4,3,4,2],[2,9,2,7]])
print(a[0:3,0:4])
[[1 4 7 3]
[6 8 8 3]
[4 3 4 2]]
import numpy as np
a=np.array([[1,3,67,4],[3,8,2,9],[1,8,4,3],[9,2,6,3]])
print(a[0:4:2])
In [24]:
import numpy as np
a=np.array([[1,3,5,6],[3,5,9,9],[3,4,2,3],[4,92,29,3]])
print(a[0:2,0:3])
[[1 3 5]
[3 5 9]]
import numpy as np
a=np.arange(10)
s=slice(2,7,2)
print(a[s])
[2 4 6]
import numpy as np
a=np.array([1,2,3,4,5,6,7])
print(a[1:5])
[2 3 4 5]
import numpy as np
a=np.array([[1,2,3],[4,5,6]])
print(a[1,0:2])
[4 5]
import numpy as np
a=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(a[2,1:3])
[10 11]
import numpy as np
a=np.array([[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],[[17,18,19,20],[21,22,23,24],[25,26,27,28],[29,30,31,32]]])
print(a[1,2])
[25 26 27 28]
import numpy as np
a=np.array([[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],[[17,18,19,20],[21,22,23,24],[25,26,27,28],[29,30,31,32]]])
print(a[1,0:4:2])
[[17 18 19 20]
[25 26 27 28]]
import numpy as np
x=np.array((3,5,7))
y=np.array((5,7,9))
print(np.hstack((x,y)))
[3 5 7 5 7 9]
import numpy as np
x=np.array([[3], [5], [7]])
y=np.array([[5], [7], [9]])
print(np.hstack((x,y)))
[[3 5]
[5 7]
[7 9]]
import numpy as np
x = np.array([3, 5, 7])
y = np.array([5, 7, 9])
print(np.vstack((x,y)))
[[3 5 7]
[5 7 9]]
import numpy as np
x=np.array([[3], [5], [7]])
y=np.array([[5], [7], [9]])
print(np.vstack((x,y)))
[[3]
[5]
[7]
[5]
[7]
[9]]
import numpy as np
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr = np.dstack((arr1, arr2))
print(arr)
[[[1 4]
[2 5]
[3 6]]]
import numpy as np
a=np.array([1, 2, 3, 4, 5, 6])
print(np.array_split(a, 3))
[array([1, 2]), array([3, 4]), array([5, 6])]

import numpy as np
arr1 =[10,20,30,40,50]
arr2 =[2,4,5,8,10]
a = np.array(arr1)
b= np.array(arr2)
print("orginal arrays")
print(a)
print(b)
print("\n vector addition")
print(a+b)
print("\n vector subtrction")
print(a-b)
print("\n vector divition")
print(a/b)
print("\n vector dotproduct")
print(a.dot(b))
print("\n scalar multiplication ")
sclr =5
print("scalar value =",sclr)
print("array =",a)
print("result=",a*sclr)
def my_func(x,y):
if x>y:
return x-y
else:
return x+y
print("\n\n numpy.vectorize methord")
print("(return x-y if x>y otherwise return x+y)")
arr1 =[10,4,20]
arr2 =[2,3,30]
vec_func = np.vectorize(my_func)
print("array:",arr1)
print("array:",arr2)
print("result:",vec_func(arr1,arr2))
orginal arrays
[10 20 30 40 50]
[ 2 4 5 8 10]
vector addition
[12 24 35 48 60]
vector subtrction
[ 8 16 25 32 40]
vector divition
[5. 5. 6. 5. 5.]
vector dotproduct
1070
scalar multiplication
scalar value = 5
array = [10 20 30 40 50]
result= [ 50 100 150 200 250]
numpy.vectorize methord
(return x-y if x>y otherwise return x+y)
array: [10, 4, 20]
array: [2, 3, 30]
result: [ 8 1 50]
In [1]:
import pandas as pd
import numpy as np
print("DataFrame.apply function:\n")
DataFrame.apply function:
In [2]:
info=pd.DataFrame([(2,7)],columns=['P','Q'])
print("\nOriginal DataFrame:\n",info)
Original DataFrame:
P Q
0 2 7
print("\nDataFrame.Assign Function:\n")
d2=pd.DataFrame([['Dale',123],['Mark',143]],columns=['Emp','ID'])
print("\nOriginal DataFrame:\n",d2)
d2['Age']=(35,40)
print("\nAdding new column:\n",d2)
d=d2.assign(gender=['Male','Male'])
print("\nAdding new column:\n",d)
DataFrame.Assign Function:
Original DataFrame:
Emp ID
0 Dale 123
1 Mark 143
Adding new column:

Emp ID Age
0 Dale 123 35
1 Mark 143 40
Adding new column:

Emp ID Age gender
0 Dale 123 35 Male
1 Mark 143 40 Male
import pandas as pd
import numpy as np
print("DataFrame.apply function:\n")
DataFrame.apply function:
info=pd.DataFrame([(2,7)],columns=['P','Q'])
Original DataFrame:
P Q
0 2 7
print("\nSquare root of DataFrame:\n",info.apply(np.sqrt))

print("\nSum of each Column:\n",info.apply(np.sum,axis=0))
print("\nSum of each row:\n",info.apply(np.sum,axis=1))
Square root of DataFrame:

P Q
0 1.414214 2.645751
Sum of each Column:

P 2
Q 7
dtype: int64
Sum of each row:

0 9
dtype: int64
info=pd.DataFrame([[1,5,7],[2,7,8],[3,6,9]],columns=['X','Y','Z'])
print("\nMinimum and Maximum of each column:\n")
print(info.agg(['min','max']))
Original DataFrame:
X Y Z
0 1 5 7
1 2 7 8
2 3 6 9
Minimum and Maximum of each column:
X Y Z
min 1 5 7
max 3 7 9
print("\nDataFrame.Assign Function:\n")
d2=pd.DataFrame([['Dale',123],['Mark',143]],columns=['Emp','ID'])
print("\nOriginal DataFrame:\n",d2)
d2['Age']=(35,40)
print("\nAdding new column:\n",d2)
d=d2.assign(gender=['Male','Male'])
print("\nAdding new column:\n",d)
DataFrame.Assign Function:
Original DataFrame:
Emp ID
0 Dale 123
1 Mark 143
Adding new column:

Emp ID Age
0 Dale 123 35
1 Mark 143 40
Adding new column:

Emp ID Age gender
0 Dale 123 35 Male
1 Mark 143 40 Male
print("\nDataFrame.Sortfunction:\n")
info=pd.DataFrame(np.random.rand(5,2),index=[3,2,0,4,1],columns=('col 3','col 4'))
print(info)
DataFrame.Sortfunction:
col 3 col 4
3 0.360301 0.668302
2 0.675958 0.197902
0 0.846554 0.940530
4 0.813285 0.086068
1 0.812191 0.437882
print("\nDataFrame.Mergefunction:\n")
left=pd.DataFrame({'id':[1,2,3,4,5],'Name':['Alex','Amy','Allen','Alice','Ayoung'],'Sub':['Sub1','Sub2','sub4','sub6','sub5']})
DataFrame.Mergefunction:
right=pd.DataFrame({'id':[1,2,3,4,5],'Name':['billy','brian','bran','bryce','betty'],'sub':['sub2','sub4','sub3','sub6','sub5']})
print(left)
print(right)
print(pd.merge(left,right,on='id'))
id Name Sub
0 1 Alex Sub1
1 2 Amy Sub2
2 3 Allen sub4
3 4 Alice sub6
4 5 Ayoung sub5
id Name sub
0 1 billy sub2
1 2 brian sub4
2 3 bran sub3
3 4 bryce sub6
4 5 betty sub5
id Name_x Sub Name_y sub
0 1 Alex Sub1 billy sub2
1 2 Amy Sub2 brian sub4
2 3 Allen sub4 bran sub3
3 4 Alice sub6 bryce sub6
4 5 Ayoung sub5 betty sub5
import pandas as pd
import numpy as np
arr=np.array(['P','a','n','d','a','s'])
a=pd.Series(arr)
print("Series from array:")
print(a)
Series from array:

0 P
1 a
2 n
3 d
4 a
5 s
dtype: object
arr={'x':0.,'y':1.,'z':2.}
b=pd.Series(arr)
print("\n\nSeries from dictionary:\n")
print(b)
Series from dictionary:
x 0.0
y 1.0
z 2.0
dtype: float64
x=pd.Series (4,index=[0,1,2,3])
print("\nSeries using scalar\n")
print(x)
Series using scalar

0 4
1 4
2 4
3 4
dtype: int64
x=pd.Series([1,2,3],index=['a','b','c'])
print("\nSeries through index:")
print(x)
a=pd.Series(data=[1,2,3,4])
print("\n a series :\n",a)
print("\nIndex:\n",a.index)
print("\nvalues:\n",a.values)
print("\nshape:",a.shape)
print("\nDimension:",a.ndim)
print("\nSize",a.size)
Series through index:

a 1
b 2
c 3
dtype: int64
a series :
0 1
1 2
2 3
3 4
dtype: int64
Index:
RangeIndex(start=0, stop=4, step=1)
values:
[1 2 3 4]
shape: (4,)
Dimension: 1
Size 4
import pandas as pd
d=pd.read_excel("C:hemanth41.xlsx")
print("Get the table data:\n")
print(d)
Get the table data:
sno Name Reg no Dept sub1 sub2 sub3 sub4 sub5 TOTAL
0 1 walter vel 12 EEE 60 75 80 85 91 456
1 2 thomas 19 EEE 87 88 89 91 90 423
2 3 tommy 5 EEE 30 45 20 70 80 360
3 4 arthur 2 EEE 30 32 34 50 70 300
4 5 patrick 20 EEE 34 56 31 23 21 222
5 6 chris evans 3 EEE 32 43 54 65 76 323
6 7 jeny 7 EEE 56 78 98 87 43 362
7 8 kavitha 9 EEE 41 54 78 87 98 419
8 9 bruce 10 EEE 32 43 21 78 90 312
9 10 jason 15 EEE 50 20 30 40 10 150
df=pd.DataFrame(d)
print("\n Get the column heading\n",df.columns)
print("\n Get the shape-(no.of rows,no.of columns)\n",df.shape)
print("\n Get particular column values\n",df['Reg no'])
print("\n Extract\slice the table values-[including this row,excluding this row]\n",df[2:5])
print("\n Get the particular row values-through number identification\n",df.loc[7])
print("\n Get the particular row value-through'Register number'indentification\n",d.loc[d['Reg no']==105])
df=d['TOTAL']/5
print("\nmake an average of total marks:\n",df)
Get the column heading

Index(['sno', 'Name', 'Reg no', 'Dept', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5',
'TOTAL'],
dtype='object')
Get the shape-(no.of rows,no.of columns)
(10, 10)
Get particular column values

0 12
1 19
2 5
3 2
4 20
5 3
6 7
7 9
8 10
9 15
Name: Reg no, dtype: int64
Extract\slice the table values-[including this row,excluding this row]

sno Name Reg no Dept sub1 sub2 sub3 sub4 sub5 TOTAL
2 3 tommy 5 EEE 30 45 20 70 80 360
3 4 arthur 2 EEE 30 32 34 50 70 300
4 5 patrick 20 EEE 34 56 31 23 21 222
Get the particular row values-through number identification

sno 8
Name kavitha
Reg no 9
Dept EEE
sub1 41
sub2 54
sub3 78
sub4 87
sub5 98
TOTAL 419
Name: 7, dtype: object
Get the particular row value-through'Register number'indentification

Empty DataFrame
Columns: [sno, Name, Reg no, Dept, sub1, sub2, sub3, sub4, sub5, TOTAL]
Index: []
make an average of total marks:

0 91.2
1 84.6
2 72.0
3 60.0
4 44.4
5 64.6
6 72.4
7 83.8
8 62.4
9 30.0
Name: TOTAL, dtype: float64
import pandas as pd
#creating dataframe
df=pd.DataFrame([[11,21,31],[1,22,32],[315,32,33]],index=['one','two','three'],columns=['a','b','c'])
print(df)
Output:
abc
one 11 21 31
two 1 22 32
three 315 32 33
#Taking the input from DataFrame and storing in the Excel File
print("Taking the input from DataFrame and storing in the Excel File")
df.to_excel('C:Excel_for_store.xlsx',sheet_name='store')
Output:
Taking the input from DataFrame and storing in the Excel File
#Second DataFrame input to another excel file
d=pd.DataFrame([[110,210,310],[12,220,320],[310,320,330]],index=['four','five','six'],columns=['a','b'
,'c'])
d.to_excel("C:Excel_for_store1.xlsx",sheet_name='store1')
#Merging two Excel files input into third file
x=pd.read_excel("C:Excel_for_store.xlsx")#reading first excel file
y=pd.read_excel("C:Excel_for_store1.xlsx")#reading second excel file
z=pd.concat([x,y])#concatenating excel files 1 and 2
z.to_excel("C:Excel_for_store2.xlsx")
#Sorting the column values
df=z.sort_values(['a'])
print(df)
df.to_excel("C:Excel_for_store3.xlsx")#storing it as excel file
34
Output:
Unnamed: 0 a b c
1 two 1 22 32
0 one 11 21 31
1 five 12 220 320
0 four 110 210 310
2 six 310 320 330
2 three 315 32 33
#Reading a new csv file
df=pd.read_csv("C:store-csv.csv")
print(df)
Output:
Unnamed 0 a b c
0 1 two 1 22 32
1 0 one 11 21 31
Code:
print(list(df))
Output:
['Unnamed', '0', 'a', 'b', 'c']
Code:
print(format(len(df)))
Output:
2
import numpy as np
from numpy import random
a=np.zeros((5,4))
for i in range(5):
a[i,0]=50
a[i,1]=random.randint(30,180)
b=a[i,0]*np.sin(np.radians(a[i,1]))
a[i,2]=np.round_(b, decimals=2)
a[i,3]=a[i,2]/10
c=np.zeros(5)
for i in range(5):
c[i]=a[i,2]
print(a)
[[ 50. 58. 42.4 4.24 ]

[ 50. 125. 40.96 4.096]
[ 50. 146. 27.96 2.796]
[ 50. 124. 41.45 4.145]
[ 50. 103. 48.72 4.872]]
print(max(c))
48.72
print(min(c))
27.96
print(sum(c))
201.48999999999998
print(sum(c)/5)
40.297999999999995
print(np.sort(c))
[27.96 40.96 41.45 42.4 48.72]
print(a.T)
[[ 50. 50. 50. 50. 50. ]
[ 58. 125. 146. 124. 103. ]
[ 42.4 40.96 27.96 41.45 48.72 ]
[ 4.24 4.096 2.796 4.145 4.872]]
import pandas as pd
d=pd.read_excel("/ElectricCarData_Norm-2.xlsx")
print(d)
Brand Model Accel TopSpeed Range \
0 Tesla Model 3 Long Range Dual Motor 4.6 sec 233 km/h 450 km
1 Volkswagen ID.3 Pure 10.0 sec 160 km/h 270 km
2 Polestar 2 4.7 sec 210 km/h 400 km
3 BMW iX3 6.8 sec 180 km/h 360 km
4 Honda e 9.5 sec 145 km/h 170 km
.. ... ... ... ... ...
98 Nissan Ariya 63kWh 7.5 sec 160 km/h 330 km
99 Audi e-tron S Sportback 55 quattro 4.5 sec 210 km/h 335 km
100 Nissan Ariya e-4ORCE 63kWh 5.9 sec 200 km/h 325 km
101 Nissan Ariya e-4ORCE 87kWh Performance 5.1 sec 200 km/h 375 km
102 Byton M-Byte 95 kWh 2WD 7.5 sec 190 km/h 400 km
Efficiency FastCharge RapidCharge PowerTrain \

0 161 Wh/km 940 km/h Rapid charging possible All Wheel Drive
1 167 Wh/km 250 km/h Rapid charging possible Rear Wheel Drive
.. ... ... ... ...
98 191 Wh/km 440 km/h Rapid charging possible Front Wheel Drive
PlugType BodyStyle Segment Seats PriceEuro

0 Type 2 CCS Sedan D 5 55480
1 Type 2 CCS Hatchback C 5 30000
2 Type 2 CCS Liftback D 5 56440
3 Type 2 CCS SUV D 5 68040
4 Type 2 CCS Hatchback B 4 32997
.. ... ... ... ... ...
99 Type 2 CCS SUV E 5 96050
102 Type 2 CCS SUV E 5 62000
[103 rows x 14 columns]
a=pd.DataFrame(d)
print(a)

3 BMW iX3 6.8 sec 180 km/h 360 km
.. ... ... ... ... ...
Efficiency FastCharge RapidCharge PowerTrain \

.. ... ... ... ...
98 191 Wh/km 440 km/h Rapid charging possible Front Wheel Drive
PlugType BodyStyle Segment Seats PriceEuro

0 Type 2 CCS Sedan D 5 55480
2 Type 2 CCS Liftback D 5 56440
3 Type 2 CCS SUV D 5 68040
4 Type 2 CCS Hatchback B 4 32997
.. ... ... ... ... ...
99 Type 2 CCS SUV E 5 96050
102 Type 2 CCS SUV E 5 62000
print(a.columns)
Index(['Brand', 'Model', 'Accel', 'TopSpeed', 'Range', 'Efficiency',

'FastCharge', 'RapidCharge', 'PowerTrain', 'PlugType', 'BodyStyle',
'Segment', 'Seats', 'PriceEuro'],
dtype='object')
print(a.shape)
(103, 14)
print(a.iloc[ : , 6:9])
FastCharge RapidCharge PowerTrain

0 940 km/h Rapid charging possible All Wheel Drive
1 250 km/h Rapid charging possible Rear Wheel Drive
.. ... ... ...
98 440 km/h Rapid charging possible Front Wheel Drive
print(a.loc[5])
ndex(['Brand', 'Model', 'Accel', 'TopSpeed', 'Range', 'Efficiency',
Brand Lucid
Model Air
Accel 2.8 sec
TopSpeed 250 km/h
Range 610 km
Efficiency 180 Wh/km
FastCharge 620 km/h
RapidCharge Rapid charging possible
PowerTrain All Wheel Drive
PlugType Type 2 CCS
BodyStyle Sedan
Segment F
Seats 5
PriceEuro 105000
Name: 5, dtype: object
print(a.drop("RapidCharge",axis='columns'))
3 BMW iX3 6.8 sec 180 km/h 360 km
.. ... ... ... ... ...
Efficiency FastCharge PowerTrain PlugType BodyStyle Segment \

0 161 Wh/km 940 km/h All Wheel Drive Type 2 CCS Sedan D
1 167 Wh/km 250 km/h Rear Wheel Drive Type 2 CCS Hatchback C
2 181 Wh/km 620 km/h All Wheel Drive Type 2 CCS Liftback D
3 206 Wh/km 560 km/h Rear Wheel Drive Type 2 CCS SUV D
4 168 Wh/km 190 km/h Rear Wheel Drive Type 2 CCS Hatchback B
.. ... ... ... ... ... ...
98 191 Wh/km 440 km/h Front Wheel Drive Type 2 CCS Hatchback C
99 258 Wh/km 540 km/h All Wheel Drive Type 2 CCS SUV E
100 194 Wh/km 440 km/h All Wheel Drive Type 2 CCS Hatchback C
101 232 Wh/km 450 km/h All Wheel Drive Type 2 CCS Hatchback C
102 238 Wh/km 480 km/h All Wheel Drive Type 2 CCS SUV E
Seats PriceEuro
0 5 55480
1 5 30000
2 5 56440
3 5 68040
4 4 32997
.. ... ...
98 5 45000
99 5 96050
100 5 50000
101 5 65000
102 5 62000
Pandas CaseStudy
1. Create a dataset on Automobile ( columns : product_id, brand, price, body style, color, rows: 10)
2.Name the dataset as Automobile Data and get the number of columns and names of the columns
3.Print all the columns for price less than 10 lakh and create a new dataset
4.Create a new column for mileage and cc
5.Append the created column to the dataset
6.Extract 5th row and 2nd column
7.Print the dimension, values, axes, shape, size and data type of each column
8.Create a new dataset of its specifications with 3 rows and append to the dataset
9.Print the updated dataset
10.Sort by price
11.Delete 3rd row and 4th column
12.Use the aggregate functions on price column
13.Print the first 3 rows and last 3 rows
14.Change the 6th rows with new values and print the dataset
15.Convert the dataset into csv file
import pandas as pd
# 1. Create a dataset
data = {
'product_id': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
'brand': ['Toyota', 'Honda', 'Ford', 'Chevrolet', 'Nissan', 'Hyundai', 'Volkswagen', 'BMW', 'Audi', 'Mercedes'],
'price': [900000, 800000, 1100000, 950000, 850000, 750000, 1300000, 1800000, 1600000, 2200000],
'body_style': ['Sedan', 'SUV', 'Hatchback', 'Sedan', 'SUV', 'Hatchback', 'Sedan', 'SUV', 'Hatchback', 'Sedan'],
'color': ['Red', 'Blue', 'Black', 'White', 'Silver', 'Gray', 'Blue', 'Black', 'Red', 'White']
}
df = pd.DataFrame(data)
print(df)
product_id brand price body_style color

0 101 Toyota 900000 Sedan Red
1 102 Honda 800000 SUV Blue
2 103 Ford 1100000 Hatchback Black
3 104 Chevrolet 950000 Sedan White
4 105 Nissan 850000 SUV Silver
5 106 Hyundai 750000 Hatchback Gray
6 107 Volkswagen 1300000 Sedan Blue
7 108 BMW 1800000 SUV Black
8 109 Audi 1600000 Hatchback Red
9 110 Mercedes 2200000 Sedan White
In [8]:
# 2. Name the dataset and get column information
df.name = 'Automobile Data'
num_columns = len(df.columns)
column_names = df.columns.tolist()
print(column_names)
['product_id', 'brand', 'price', 'body_style', 'color']
In [9]:
# 3. Print columns for price less than 10 lakh and create a new dataset
df_less_than_10_lakh = df[df['price'] < 1000000]
print(df_less_than_10_lakh)
product_id brand price body_style color

0 101 Toyota 900000 Sedan Red
1 102 Honda 800000 SUV Blue
3 104 Chevrolet 950000 Sedan White
4 105 Nissan 850000 SUV Silver
5 106 Hyundai 750000 Hatchback Gray
In [10]:
# 4. Create new columns for mileage and cc
df['mileage'] = [20, 18, 22, 21, 19, 17, 23, 16, 15, 14]
df['cc'] = [1500, 1600, 1200, 1400, 1700, 1300, 1800, 1900, 2000, 2200]
print(df)
product_id brand price body_style color mileage cc

0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
2 103 Ford 1100000 Hatchback Black 22 1200
3 104 Chevrolet 950000 Sedan White 21 1400
4 105 Nissan 850000 SUV Silver 19 1700
5 106 Hyundai 750000 Hatchback Gray 17 1300
6 107 Volkswagen 1300000 Sedan Blue 23 1800
7 108 BMW 1800000 SUV Black 16 1900
8 109 Audi 1600000 Hatchback Red 15 2000
9 110 Mercedes 2200000 Sedan White 14 2200
In [12]:
# 5. Append the created columns to the dataset
df = df[['product_id', 'brand', 'price', 'body_style', 'color', 'mileage', 'cc']]
print(df)

0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
7 108 BMW 1800000 SUV Black 16 1900
In [11]:
# 6. Extract 5th row and 2nd column
row_5_col_2 = df.iloc[4, 1]
print(row_5_col_2)
Nissan
In [13]:
# 7. Print dimension, values, axes, shape, size, and data type of each column
print(df.ndim)
print(df.values)
print(df.axes)
print(df.shape)
print(df.size)
print(df.dtypes)
2
[[101 'Toyota' 900000 'Sedan' 'Red' 20 1500]
[102 'Honda' 800000 'SUV' 'Blue' 18 1600]
[103 'Ford' 1100000 'Hatchback' 'Black' 22 1200]
[104 'Chevrolet' 950000 'Sedan' 'White' 21 1400]
[105 'Nissan' 850000 'SUV' 'Silver' 19 1700]
[106 'Hyundai' 750000 'Hatchback' 'Gray' 17 1300]
[107 'Volkswagen' 1300000 'Sedan' 'Blue' 23 1800]
[108 'BMW' 1800000 'SUV' 'Black' 16 1900]
[109 'Audi' 1600000 'Hatchback' 'Red' 15 2000]
[110 'Mercedes' 2200000 'Sedan' 'White' 14 2200]]
[RangeIndex(start=0, stop=10, step=1), Index(['product_id', 'brand', 'price', 'body_style', 'color', 'mileage', 'cc'], dtype='object')]
(10, 7)
70
product_id int64
brand object
price int64
body_style object
color object
mileage int64
cc int64
dtype: object
In [14]:
# 8. Create a new dataset of its specifications with 3 rows and append to the dataset
specifications = {
'product_id': [111, 112, 113],
'brand': ['Kia', 'Mazda', 'Subaru'],
'price': [1200000, 1000000, 1100000],
'body_style': ['SUV', 'Sedan', 'Hatchback'],
'color': ['Gray', 'Green', 'Orange'],
'mileage': [16, 17, 18],
'cc': [1700, 1500, 1400]
}
df_specifications = pd.DataFrame(specifications)
df = df.append(df_specifications, ignore_index=True)
print(df)

0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
7 108 BMW 1800000 SUV Black 16 1900
10 111 Kia 1200000 SUV Gray 16 1700
11 112 Mazda 1000000 Sedan Green 17 1500
12 113 Subaru 1100000 Hatchback Orange 18 1400
C:\Users\Tcs\AppData\Local\Temp\ipykernel_3956\1105582730.py:12: FutureWarning: The frame.append method is deprecated and will be

removed from pandas in a future version. Use pandas.concat instead.
df = df.append(df_specifications, ignore_index=True)
In [15]:
# 9. Print the updated dataset
print("Updated Dataset:")
print(df)
Updated Dataset:
0 101 Toyota 900000 Sedan Red 20 1500
1 102 Honda 800000 SUV Blue 18 1600
7 108 BMW 1800000 SUV Black 16 1900
10 111 Kia 1200000 SUV Gray 16 1700
In [16]:
# 10. Sort by price
df = df.sort_values(by='price')
print(df)

1 102 Honda 800000 SUV Blue 18 1600
0 101 Toyota 900000 Sedan Red 20 1500
10 111 Kia 1200000 SUV Gray 16 1700
7 108 BMW 1800000 SUV Black 16 1900
In [17]:
# 11. Delete 3rd row and 4th column
df = df.drop(index=2, axis=0)
df = df.drop(columns=df.columns[3])
print(df)
product_id brand price color mileage cc

5 106 Hyundai 750000 Gray 17 1300
1 102 Honda 800000 Blue 18 1600
4 105 Nissan 850000 Silver 19 1700
0 101 Toyota 900000 Red 20 1500
3 104 Chevrolet 950000 White 21 1400
11 112 Mazda 1000000 Green 17 1500
12 113 Subaru 1100000 Orange 18 1400
10 111 Kia 1200000 Gray 16 1700
6 107 Volkswagen 1300000 Blue 23 1800
8 109 Audi 1600000 Red 15 2000
7 108 BMW 1800000 Black 16 1900
9 110 Mercedes 2200000 White 14 2200
In [18]:
# 12. Use aggregate functions on the price column
price_stats = df['price'].agg(['mean', 'median', 'min', 'max'])
print(price_stats)
mean 1.204167e+06
median 1.050000e+06
min 7.500000e+05
max 2.200000e+06
Name: price, dtype: float64
In [19]:
# 13. Print the first 3 rows and last 3 rows
print(df.head(3))
print(df.tail(3))

5 106 Hyundai 750000 Gray 17 1300
1 102 Honda 800000 Blue 18 1600
4 105 Nissan 850000 Silver 19 1700
8 109 Audi 1600000 Red 15 2000
7 108 BMW 1800000 Black 16 1900
9 110 Mercedes 2200000 White 14 2200
In [6]:
# 14. Convert the dataset into a CSV file
df.to_csv('Automobile_Data.csv', index=False)
print(df)

5 106 Hyundai 750000 Gray 17 1300
1 102 Honda 800000 Blue 18 1600
4 105 Nissan 850000 Silver 19 1700
0 101 Toyota 900000 Red 20 1500
3 104 Chevrolet 950000 White 21 1400
11 112 Mazda 1000000 Green 17 1500
12 113 Subaru 1100000 Orange 18 1400
10 111 Kia 1200000 Gray 16 1700
6 107 Volkswagen 1300000 Blue 23 1800
8 109 Audi 1600000 Red 15 2000
7 108 BMW 1800000 Black 16 1900
9 110 Mercedes 2200000 White 14 2200
STORE
CASE STUDY – ELECTRIC VEHICLE
AIM:
To perform and generate the dataset for the given conditions and store the same.
DESCRIPTION:Create a Dataset with Brand, Type, Price, Warranty , Range and Sales Percentage (min. 5 details)Store the created
dataset.Create new dataset based on the type(Scooter/Bike)Merge the newly created file with the existing file.Sort the dataset based on the
sales percentage and store.Read and display the edited dataset.List the column headings and get the length of the table data.Create a Dataset
with same Brand in with IC Engines Merge the IC Engine Dataset with first Dataset with BrandSort the Price,Sales Percentage and Range.
import pandas as pd
# Create a Dataset with Brand, Type, Price, Warranty, Range, and Sales Percentage
data = {
'Brand': ['Tesla', 'Niu', 'Ather', 'Hero', 'Revolt'],
'Type': ['Car', 'Scooter', 'Scooter', 'Bike', 'Bike'],
'Price': [80000, 2000, 1500, 3000, 2500],
'Warranty': [4, 2, 2, 3, 2],
'Range': [300, 60, 80, 100, 90],
'Sales Percentage': [30, 15, 10, 5, 8]
}
ev_df = pd.DataFrame(data)
print(ev_df)
Brand Type Price Warranty Range Sales Percentage

0 Tesla Car 80000 4 300 30
1 Niu Scooter 2000 2 60 15
2 Ather Scooter 1500 2 80 10
3 Hero Bike 3000 3 100 5
4 Revolt Bike 2500 2 90 8
In [3]:
# Store the created dataset
ev_df.to_csv('electric_vehicle_data.csv', index=False)
# Create a new dataset based on the type (Scooter/Bike)

scooter_df = ev_df[ev_df['Type'] == 'Scooter']
bike_df = ev_df[ev_df['Type'] == 'Bike']
print(bike_df)

3 Hero Bike 3000 3 100 5
In [4]:
# Create a new dataset based on the type (Scooter/Bike)
scooter_df = ev_df[ev_df['Type'] == 'Scooter']
bike_df = ev_df[ev_df['Type'] == 'Bike']
print(bike_df)

3 Hero Bike 3000 3 100 5
In [5]:
# Merge the newly created file with the existing file
merged_df = pd.concat([ev_df, scooter_df], ignore_index=True)
print(merged_df)

0 Tesla Car 80000 4 300 30
1 Niu Scooter 2000 2 60 15
3 Hero Bike 3000 3 100 5
5 Niu Scooter 2000 2 60 15
In [6]:
# Sort the dataset based on the sales percentage and store
sorted_df = ev_df.sort_values(by='Sales Percentage', ascending=False)
sorted_df.to_csv('sorted_electric_vehicle_data.csv', index=False)
In [7]:
# Read and display the edited dataset
edited_df = pd.read_csv('sorted_electric_vehicle_data.csv')
print("Edited Dataset:")
print(edited_df)
Edited Dataset:
0 Tesla Car 80000 4 300 30
1 Niu Scooter 2000 2 60 15
4 Hero Bike 3000 3 100 5
In [8]:
# List the column headings and get the length of the table data
columns = edited_df.columns.tolist()
print(len(edited_df))
In [11]:
# Create a Dataset with the same Brand with IC Engines
ic_engine_data = {
'Brand': ['Tesla', 'Niu', 'Ather', 'Hero', 'Revolt'],
'Engine Type': ['Electric', 'IC Engine', 'IC Engine', 'IC Engine', 'IC Engine']
}
ic_engine_df = pd.DataFrame(ic_engine_data)
print(ic_engine_df)
Brand Engine Type

0 Tesla Electric
1 Niu IC Engine
2 Ather IC Engine
3 Hero IC Engine
4 Revolt IC Engine
In [12]:
# Merge the IC Engine Dataset with the first Dataset with Brand
merged_with_ic_engine_df = pd.merge(ev_df, ic_engine_df, on='Brand', how='left')
print(merged_with_ic_engine_df)
Brand Type Price Warranty Range Sales Percentage Engine Type

0 Tesla Car 80000 4 300 30 Electric
1 Niu Scooter 2000 2 60 15 IC Engine
2 Ather Scooter 1500 2 80 10 IC Engine
3 Hero Bike 3000 3 100 5 IC Engine
4 Revolt Bike 2500 2 90 8 IC Engine
In [13]:
# Sort the Price, Sales Percentage, and Range
sorted_by_price = merged_with_ic_engine_df.sort_values(by='Price')
sorted_by_sales_percentage = merged_with_ic_engine_df.sort_values(by='Sales Percentage', ascending=False)
sorted_by_range = merged_with_ic_engine_df.sort_values(by='Range', ascending=False)
print(sorted_by_price)
print(sorted_by_sales_percentage)
print(sorted_by_range)

In [ ]:

Python 2

Uploaded by

Copyright:

Available Formats

You might also like

Python 2

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Python 2

Uploaded by

Copyright:

Available Formats

import numpy as np

[array([1, 2]), array([3, 4]), array([5, 6])]

Adding new column:

Adding new column:

print("\nSquare root of DataFrame:\n",info.apply(np.sqrt))

Square root of DataFrame:

Sum of each Column:

Sum of each row:

Minimum and Maximum of each column:

Adding new column:

Adding new column:

Series from array:

Series from dictionary:

Series using scalar

Series through index:

Get the table data:

Get the column heading

Get particular column values

Extract\slice the table values-[including this row,excluding this row]

Get the particular row values-through number identification

Get the particular row value-through'Register number'indentification

make an average of total marks:

[[ 50. 58. 42.4 4.24 ]

Efficiency FastCharge RapidCharge PowerTrain \

PlugType BodyStyle Segment Seats PriceEuro

[103 rows x 14 columns]

Brand Model Accel TopSpeed Range \

Efficiency FastCharge RapidCharge PowerTrain \

PlugType BodyStyle Segment Seats PriceEuro

[103 rows x 14 columns]

Index(['Brand', 'Model', 'Accel', 'TopSpeed', 'Range', 'Efficiency',

FastCharge RapidCharge PowerTrain

[103 rows x 3 columns]

Efficiency FastCharge PowerTrain PlugType BodyStyle Segment \

[103 rows x 13 columns]

product_id brand price body_style color

['product_id', 'brand', 'price', 'body_style', 'color']

product_id brand price body_style color

product_id brand price body_style color mileage cc

product_id brand price body_style color mileage cc

product_id brand price body_style color mileage cc

C:\Users\Tcs\AppData\Local\Temp\ipykernel_3956\1105582730.py:12: FutureWarning: The frame.append method is deprecated and will be

product_id brand price body_style color mileage cc

product_id brand price color mileage cc

product_id brand price color mileage cc

product_id brand price color mileage cc

Brand Type Price Warranty Range Sales Percentage

# Create a new dataset based on the type (Scooter/Bike)

Brand Type Price Warranty Range Sales Percentage

Brand Type Price Warranty Range Sales Percentage

Brand Type Price Warranty Range Sales Percentage

Brand Engine Type

Brand Type Price Warranty Range Sales Percentage Engine Type

Brand Type Price Warranty Range Sales Percentage Engine Type

You might also like