Pandas Pract

#create a series of marks (Aadya and Devangi)
import pandas as pd
s = pd.Series([45,65,24,89], index = ['Term1','Term 2','Term 3','Term 4'])
print(s)
OUTPUT:
Term1 45
Term 2 65
Term 3 24
Term 4 89
Dtype: int64
#create a series of students of section of 12th class (Aadya and Devangi):
import pandas as pd
d = {‘A’:35,’B’:40,’C’:32,’D’:30}
s = pd.Series(d)
print(s)
OUTPUT:
A 35
B 40
C 32
D 30
dtype: int64
#create a series object from a list object containing number of days of first
four months of a year. change the index values from 0,1,2,3 to
‘jan’,’feb’,’mar’,’apr’ respectively And data type to float(Aadya and Devangi)
Import pandas as pd
Import numpy as np
s= pd.Series([31,28,31,30], index =[‘jan’,’feb’,’mar’,’apr’], dtype = np.float64)
print(s)
OUTPUT:
jan 31.0
feb 28.0
mar 31.0
apr 30.0
dtype: float64
#to create a series object displaying all the attributes (Aadya and
Devangi)

import pandas as pd
s = pd.Series([31,28,31,30])
index = ['jan','feb','mar','apr']
s.name= 'month’
s.index.name = 'name'
print(s)
print('name of the series is as follows : 's.name)
print('index of the series = ',s.index)
print('name of the index = ', s.index.name )
print('values of the series are: ',s.values)
print('shape of the series : ',s.shape)
print('no. of bytes occypied by series elements : ' ,s.nbytes)
print('total no. of elements in a series: ',s.size)
print('is there None value in the series ?',s.hasnans)
print('is the series empty?',s.empty)

output:
name
0 31
1 28
2 31
3 30
Name: month, dtype: int64
name of the series is as follows : month
index of the series = RangeIndex(start=0, stop=4, step=1,
name='name')
name of the index = name
values of the series are: [31 28 31 30]
shape of the series : (4,)
no. of bytes occypied by series elements : 32
total no. of elements in a series: 4
is there None value in the series ? False
is the series empty? False

#create a series with data as marks of five students and index as the names
of students (Aadya and Devangi)
import pandas as pd
s = pd.Series([65,97,38,81,78],index = ['Ajay','Vipul','Shyam','Harsh','Kartik'])
print(s)
#marks of vipul by using index method
print('\nmarks of Vipul : ',s[1])
print('\nmarks of Vipul by using label indexing :',s['Vipul'])
#Display the marks of Ajay,Shyam and Kartik only by using index method
print('\nmarks of Ajay,shyam,Kartik:', s[[0,2,4]])
#Display the marks of Ajay,Shyam and Kartik only by using labelled index
print('\nmarks of Ajay,shyam,Kartik:',s[['Ajay','Shyam','Kartik']])
#Display the marks of Ajay,Shyam and Kartik only by using loc
print('\nmarks of Ajay,Shyam and kartik: ',s.loc[['Ajay','Shyam','Kartik']])
#Display the marks of Ajay,Shyam and Kartik only by using iloc
print('\nmarks of Ajay,Shyam and kartik: ',s.iloc[[0,2,4]])
#display 97,38,81 using slicing and use default indexing
print('\nvalues of index no. are :',s[1:4])
#display 97,38,81 using slicing and use labelled indexing
print('\nvalues of index no. are : ',s['Vipul':'Harsh'])
#display marks of alternate students using slicing method using labelled
indexing
print('\nmarks of alternate students : ',s['Ajay':'Kartik':2])
#display marks of all those students who scored more than 60
print('\nmarks of students who scored more than 60: ',s[s>60])
print('\nmarks of students who scored more than 60: ',s.loc[s>60]) #using loc
method

output:
Ajay 65
Vipul 97
Shyam 38
Harsh 81
Kartik 78
dtype: int64
marks of Vipul : 97
marks of Vipul by using label indexing : 97
marks of Ajay,shyam,Kartik: Ajay 65
Shyam 38
Kartik 78
dtype: int64
marks of Ajay,shyam,Kartik: Ajay 65
Shyam 38
Kartik 78
dtype: int64
marks of Ajay,Shyam and kartik: Ajay 65
Shyam 38
Kartik 78
dtype: int64
marks of Ajay,Shyam and kartik: Ajay 65
Shyam 38
Kartik 78
dtype: int64
values of index no. are : Vipul 97
Shyam 38
Harsh 81
dtype: int64
values of index no. are : Vipul 97
Shyam 38
Harsh 81
dtype: int64
marks of alternate students : Ajay 65
Shyam 38
Kartik 78
dtype: int64
marks of students who scored more than 60: Ajay 65
Vipul 97
Harsh 81
Kartik 78
dtype: int64
marks of students who scored more than 60: Ajay 65
Vipul 97
Harsh 81
Kartik 78
dtype: int64

#create two series and perform all the
mathematical operations (Aadya and Devangi)
import pandas as pd
series1 = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
series2 = pd.Series([10,20,-10,-50,-100],index = ['z','y','a','c','e'])
print(series1)
print(series2)
#addition of 2 series using + operator
seriesSum = series1+series2
print(seriesSum)
#addition of 2 series using 'add' method
series_add = series1.add(series2)
print(series_add)
#subtraction of 2 series using - operator
seriesSub = series1-series2
print(seriesSub)
#subtraction of 2 series using 'sub' method
series_sub = series1.sub(series2)
print(series_sub)
#multiplication of 2 series using * operator
seriesMul = series1+series2
print(seriesMul)
#multiplication of 2 series using 'mul' method
series_mul = series1.mul(series2)
print(series_mul)
#division of 2 series using / operator
seriesDiv = series1/series2
print(seriesDiv)
#division of 2 series using 'div' operator
series_Div = series1.div(series2)
print(series_Div)

OUTPUT:
a 1
b 2
c 3
d 4
e 5
dtype: int64
z 10
y 20
a -10
c -50
e -100
dtype: int64
a -9.0
b NaN
c -47.0
d NaN
e -95.0
y NaN
z NaN
dtype: float64
a -9.0
b NaN
c -47.0
d NaN
e -95.0
y NaN
z NaN
dtype: float64
a 11.0
b NaN
c 53.0
d NaN
e 105.0
y NaN
z NaN
dtype: float64
a 11.0
b NaN
c 53.0
d NaN
e 105.0
y NaN
z NaN
dtype: float64
a -9.0
b NaN
c -47.0
d NaN
e -95.0
y NaN
z NaN
dtype: float64
a -10.0
b NaN
c -150.0
d NaN
e -500.0
y NaN
z NaN
dtype: float64
a -0.10
b NaN
c -0.06
d NaN
e -0.05
y NaN
z NaN
dtype: float64
a -0.10
b NaN
c -0.06
d NaN
e -0.05
y NaN
z NaN
dtype: float64

#series of numbers & operations on series (Aadya and Devangi)
import pandas as pd
s = pd.Series([6700,5600,5000,5200,2800], index = ['A','B','C','D','E'])
print(s)
#replace value of index 'D' to 3000
s['D']=3000
print('\nseries after modification')
print(s)
#replace values from 'B' to 'D' to 2500
s['B':'D'] = 2500
print('\nseries after modification')
print(s)
# display the vector operation > 2800 with the elements of series
print('\nseries elements > 2800')
print(s>2800)
#increase series elements by 10
s = s+10
print('\nseries after adding 10 to all the elements')
print(s)
#series in ascending order
print('\nseries in ascending order')
print(s.sort_values())
#series in descending order
print('\nseries in descending order')
print(s.sort_values(ascending=False))

OUTPUT:
A 6700
B 5600
C 5000
D 5200
E 2800
dtype: int64

series after modification
A 6700
B 5600
C 5000
D 3000
E 2800
dtype: int64

series after modification
A 6700
B 2500
C 2500
D 2500
E 2800
dtype: int64
series elements > 2800
A True
B False
C False
D False
E False
dtype: bool
series after adding 10 to all the elements
A 6710
B 2510
C 2510
D 2510
E 2810
dtype: int64
series in ascending order
B 2510
C 2510
D 2510
E 2810
A 6710
dtype: int64
series in descending order
A 6710
E 2810
B 2510
C 2510
D 2510
dtype: int64
#Create dataframe using all 4 methods (Aadya and Devangi)
import pandas as pd
# a) list in list method / nested list
d1=[[10927986,189,7916],[12691836,208,8508],[4631392,149,7226],
[4328063,157,7617]
df1=pd.DataFrame(d1,index=['Delhi','Mumbai','Kolkata','Chennai'],coloumns=['
Population','Hospitals','School'])
print('a) dataframe using nested list:')
print(df1)
# b) list in dictionary method

d2={'Population':[10927986,12691836,4631392,4328063],'Hospitals':
[189,208,149,157],'School':[7916,8508,7226,7617]}
df2=pd.DataFrame(d2,index=['Delhi','Mumbai','Kolkata','Chennai'])
print('\nb) dataframe using list in dictionary method:')
print(df2)
# c) dictionary in dictionary / nested dictionary:

d3={'Population':
{'Delhi':10927986,'Mumbai':12691836,'Kolkata':4631392,'Chennai':4328063},'
Hospitals':{'Delhi':189,'Mumbai':208,'Kolkata':149,'Chennai':157},'School':
{'Delhi':7916,'Mumbai':8508,'Kolkata':7226,'Chennai':7617}}
df3=pd.DataFrame(d3)
print('\nc) dataframe using nested dictionary:')
print(df3)
# d) dictionary in list method:

a={'Population':10927986,'Hospitals':189,'School':7916}
b={'Population':12691836,'Hospitals':208,'School':8508}
c={'Population':4631392,'Hospitals':149,'School':7226}
d={'Population':4328063,'Hospitals':157,'School':7617}
d4=[a,b,c,d]
df4=pd.DataFrame(d4,index=['Delhi','Mumbai','Kolkata','Chennai'])
print('\nd) dataframe using dictionary in list method:')
print(df4)
OUTPUT:
a)dataframe using nested list:

Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
b) dataframe using list in dictionary method:

Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
c) dataframe using nested dictionary:

Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
d) dataframe using dictionary in list method:

Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
#creating a data frame and using all attributes in one data frame
(Devangi and Aadya)

import pandas as pd
df = pd.DataFrame([[25,24],['Neha','Rohit'],['Female','Male']],index =
['age','name','sex'],columns = ['marketing','sales'])
print('\nthe dataframe is as follows:\n',df)

#change the index of the DataFrame
df.index = ['Age','Name','Gender']
print('\nindex of the data frame :',df.index)

#statement to change column labels to COL1,COL2
df.columns = ['COL1','COL2']
print('\ncolumns after being modified:',df.columns)
print(df)

#display data type of each column
print('\ndata type of each columns:\n',df.dtypes)

#axes of the dataframe
print('\naccesing group of rows and columns by labels:\n',df.axes)

#display size of data frame
print('\nsize of data frame :',df.size)

#display the dimensions of the data frame
print('\ndimension of the data frame :',df.shape)

#values in the data frame
print('\nvalues in the data frame:\n',df.values)

#display True if DF is empty otherwise False
print('\nis the data frame empty:',df.empty)

#to transpose the dataframe
#that means that the row and column labels of DF would replace each other
print('\ndataframe after transposing:\n',df.T)

OUTPUT:
the dataframe is as follows:
marketing sales
age 25 24
name Neha Rohit
sex Female Male

index of the data frame : Index(['Age', 'Name', 'Gender'], dtype='object')

columns after being modified: Index(['COL1', 'COL2'], dtype='object')

COL1 COL2
Age 25 24
Name Neha Rohit
Gender Female Male

data type of each columns:
COL1 object
COL2 object
dtype: object

accesing group of rows and columns by labels:
[Index(['Age', 'Name', 'Gender'], dtype='object'), Index(['COL1',
'COL2'], dtype='object')]

size of data frame : 6

dimension of the data frame : (3, 2)

values in the data frame:
[[25 24]
['Neha' 'Rohit']
['Female' 'Male']]

is the data frame empty: False

dataframe after transposing:
Age Name Gender
COL1 25 Neha Female
COL2 24 Rohit Male
#Accessing elements of a dataframe using slicing,indexing,boolean
indexing,loc and iloc (Devangi and Aadya)
import pandas as pd
d={'Population':[10927986,12691836,4631392,4328063,],'Hospitals':
[189,208,149,157],'School':[7916,8508,7226,7617]}
df=pd.DataFrame(d,index=['Delhi','Mumbai','Kolkata','Chennai'])
print('DATA FRAME:')
print(df)
# a) to display population of cities

print('\ndisplay population of cities:')
print(df.Population) #by dot notation
print('\ndisplay population of cities:')
print(df['Population']) #by index()
# b) to display population and no. of schools in all cities

print('Population and number of schools of all cities:')
print(df[['Population','School']])
# c) to display number of schools in Kolkata

print('no. of schools in Kolkata:',df.School['Kolkata']) #by labelled indexing
print('no. of schools in Kolkata:',df.School[2]) #by default indexing
# d) to show vectorisation:
print('\nelements in dataframe < 200?')
print(df<200)
print('\nadd 2 to each element of dataframe:')
print(df+2)
# e) to display all rows where no. of hospitals<200

print('\nrows of the dataframe where hospitals<200:')
print(df[df.Hospitals<200]) #by dot notation
print('\nrows of the dataframe where hospitals<200:')
print(df[df['Hospitals']<200]) #by index()
# f) to display population and school for all the rows only if number of
Hospitals are less than 200 and number of schools are more than 7500
print('\npopulation and school for all the rows only if number of Hospitals are
less than 200 and number of schools are more than 7500')
print(df[(df.Hospitals<200)&(df.School>7500)][['Population','School']]) #by dot
notation
# g) to display coloumns of hospitals

print('\ncoloumns of hospitals in the form of series:')
print(df.loc[:,'Hospitals']) #by iloc()
print(df.iloc[:,1]) #by iloc
print('\ncoloumns of hospitals in the form of dataframe:')
print(df.loc[:,['Hospitals']])
# h) to display all rows from delhi to kolkata using slicing:

print('rows from delhi to kolkata:')
print('df.loc['Delhi':'Kolkata','Population':'Hospitals'])
# i) to display no. of hospitals in Mumbai:

print('\nno. of Hospitals in Mumbai are:',df.loc['Mumbai','Hospitals'])
# j) to display population and hospitals of Delhi and Chennai:

print('\npopulation and hospitals of Delhi and Chennai:')
print(df.iloc[[0,3],[0,1]])
OUTPUT:
DATA FRAME:

Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
display population of cities:

Delhi 10927986
Mumbai 12691836
Kolkata 4631392
Chennai 4328063
Name: Population, dtype: int64
display population of cities:

Delhi 10927986
Mumbai 12691836
Kolkata 4631392
Chennai 4328063
Name: Population, dtype: int64
Population and number of schools of all cities:

Population School
Delhi 10927986 7916
Mumbai 12691836 8508
Kolkata 4631392 7226
Chennai 4328063 7617
no. of schools in Kolkata: 7226

no. of schools in Kolkata: 7226
elements in dataframe < 200?

Delhi False True False
Mumbai False False False
Kolkata False True False
Chennai False True False
add 2 to each element of dataframe:

Delhi 10927988 191 7918
Mumbai 12691838 210 8510
Kolkata 4631394 151 7228
Chennai 4328065 159 7619
rows of the dataframe where hospitals<200:

Delhi 10927986 189 7916
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
rows of the dataframe where hospitals<200:
Delhi 10927986 189 7916
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
population and school for all the rows only if number of Hospitals are less
than 200 and number of schools are more than 7500:
Population School
Delhi 10927986 7916
Chennai 4328063 7617
coloumns of hospitals in the form of series:

Delhi 189
Mumbai 208
Kolkata 149
Chennai 157
Name: Hospitals, dtype: int64
Delhi 189
Mumbai 208
Kolkata 149
Chennai 157
Name: Hospitals, dtype: int64
coloumns of hospitals in the form of dataframe:

Hospitals
Delhi 189
Mumbai 208
Kolkata 149
Chennai 157
rows from delhi to kolkata:

Population Hospitals
Delhi 10927986 189
Mumbai 12691836 208
Kolkata 4631392 149
no. of Hospitals in Mumbai are: 208
population and hospitals of Delhi and Chennai:

Population Hospitals
Delhi 10927986 189
Chennai 4328063 157
#create dataframe and use methods-head,tail,len,count (Aadya and Devangi)

import pandas as pd
d={'Rollno.':[115,236,307,422],'Name':['Pavni','Rishi','Preet','Paula'],'Marks':
[97.5,98.0,98.5,98.0]}
df=pd.DataFrame(d,index=['secA','secB','secC','secD'])
print('DATA FRAME:')
print(df)
# a) to retrieve upper 3 rows:

print('upper 3 rows:')
print(df.head(3))
# b) to print lower 2 rows:

print ('lower 2 rows:')
print(df.tail(2))
# c) to count no. of elements in each row and coloumn:

print('no. of elements in each row:')
print(df.count(1))
print('no. of elements in each coloumn:')
print(df.count())
# d) to display length of the dataframe:

print('length of the dataframe:')
print(len(df))
OUTPUT:
DATA FRAME:
Rollno. Name Marks
secA 115 Pavni 97.5
secB 236 Rishi 98.0
secC 307 Preet 98.5
secD 422 Paula 98.0
upper 3 rows:
Rollno. Name Marks
secA 115 Pavni 97.5
secB 236 Rishi 98.0
secC 307 Preet 98.5
lower 2 rows:
Rollno. Name Marks
secC 307 Preet 98.5
secD 422 Paula 98.0
no. of elements in each row:

secA 3
secB 3
secC 3
secD 3
dtype: int64
no. of elements in each coloumn:

Rollno. 4
Name 4
Marks 4
dtype: int64
length of the dataframe:

4
#CREATING A DATAFRAME AND SHOWING ADDITION AND DELETION OF
COLUMNS:
(Aadya and Devangi)
import pandas as pd d1=[[10927986,189,7916],[12691836,208,8508],
[4631392,149,7226],[4328063,157,7617]]
df1=pd.DataFrame(d1,index=['Delhi','Mumbai','Kolkata','Chennai'],columns=['P
opulation','Hospitals','School'])
print(“original dataframe \n”,df1)
# a) add a column density with 1219 for all its rows:

df1['density']=1219
print('dataframe after adding column density with 1219 as value of all
rows:\n',df1)
# b) add a column density with different values for all its rows:
df1['density']= [1500,1219,1630,1050]
print(‘dataframe after adding column density with different values for all its
rows:\n’,df1)
# c) add a column named ‘industry’ before column schools:

df1.insert(2,'industry',[12000,13567,6789,1234],True)
print(‘dataframe after adding column industry before school:\n’,df1)
# d) to delete a column:
# to delete column density using drop():

df1 = df1.drop('density', axis = 1)
print(‘dataframe after deleting column density using drop()\n’,df1)
# to delete column industry using del statement:

del df1['industry']
print(‘dataframe after deleting column industry using del statement:\n’,df1)
# e) add a new column with label as density with different values for all rows
using loc method:
df1.loc[:,'density']=[12,13,14,15]
print(‘dataframe after adding a new column with label as density with different
values for all rows using loc method\n:’,df1)
# f) to delete column density using pop():
df1.pop('density')
print(‘dataframe after deleting column density using pop()\n’,df1)
OUTPUT:
Original dataframe:
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
dataframe after adding column density with 1219 as value of all rows:
Population Hospitals School density
Delhi 10927986 189 7916 1219
Mumbai 12691836 208 8508 1219
Kolkata 4631392 149 7226 1219
Chennai 4328063 157 7617 1219
dataframe after adding column density with different values for all its rows:
Delhi 10927986 189 7916 1500
Mumbai 12691836 208 8508 1219
Kolkata 4631392 149 7226 1630
Chennai 328063 157 7617 1050
dataframe after adding column industry before school:

Population Hospitals industry School density
Delhi 10927986 189 12000 7916 1500
Mumbai 12691836 208 13567 8508 1219
Kolkata 4631392 149 6789 7226 1630
Chennai 4328063 157 1234 7617 1050
dataframe after deleting column density using drop()

Population Hospitals industry School
Delhi 10927986 189 12000 7916
Mumbai 12691836 208 13567 8508
Kolkata 4631392 149 6789 7226
Chennai 4328063 157 1234 7617
dataframe after deleting column industry using del statement:

Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
dataframe after adding a new column with label as density with different
values for all rows using loc method:
Delhi 10927986 189 7916 12
Mumbai 12691836 208 8508 13
Kolkata 4631392 149 7226 14
Chennai 4328063 157 7617 15
dataframe after deleting column density using pop() :

Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
#CREATE A DATAFRAME AND SHOW RENAMING OF ROWS AND COLUMNS

(Aadya and Devangi)
import pandas as pd d1=[[10927986,189,7916],[12691836,208,8508],
[4631392,149,7226],[4328063,157,7617]]
df1=pd.DataFrame(d1,index=['Delhi','Mumbai','Kolkata','Chennai'],columns=['P
opulation','Hospitals','School'])
print(‘original dataframe:\n’,df1)
# a) to rename all column labels ‘Population’, ’Hospitals’ ,’School’ to ‘P’,

’H’ ,’S’:
df2=df1.rename(columns={'Population':'P','Hospitals':'H','School':'S'})
print(‘Dataframe after renaming:\n’,df2)
# b) to rename ‘Population’ label to ‘P’:

df3=df1.rename(columns={'Population':'P'})
print(‘dataframe after renaming ‘Population’ label to ‘P’:\n’,df3)
# c) to rename row label ‘Delhi’ and ‘Chennai’ to ‘Del’ and ‘Che’ respectively:
df4=df1.rename(index={'Delhi':'Del','Chennai':'Che'})
print('Dataframe after renaming row label ‘Delhi’ and ‘Chennai’ to ‘Del’ and
‘Che’ respectively: \n',df4)
# d) to rename column label ‘population’ to ‘p’ and row label ‘Delhi’ and
‘Chennai’ to ‘Del’ and ‘Che’ respectively:
df5=df1.rename(columns={'Population':'P'},index={'Delhi':'Del','Chennai':'Che'}
)
print('dataframe after renaming: \n',df5)
OUTPUT:
original dataframe:
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
Dataframe after renaming:

P H S
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
dataframe after renaming ‘Population’ label to ‘P’:

P Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
Dataframe after renaming row label ‘Delhi’ and ‘Chennai’ to ‘Del’ and ‘Che’
respectively:
Del 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Che 4328063 157 7617
dataframe after renaming:

P Hospitals School
Del 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Che 4328063 157 7617
>>>
#CREATE A DATAFRAME AND SHOW ADDITION ,DELETION AND

MODIFICATION OF ROWS AND COLUMNS:
(Aadya and Devangi)
import pandas as pd
d1=[[90,92,89,81,94],[91,81,91,71,95],[97,96,88,67,99],[97,89,78,60,45]]
df=pd.DataFrame(d1,index=['Maths','Science','Hindi','Hindi'],columns=['Arnab',
'Ramit','Samriddhi','Riya','Mallika'])
print('original dataframe:\n',df)
# a) to delete science row using labelled indexing:

df1 = df.drop('Science')
print('dataframe after deletion of row Science \n',df1)
# b) to delete rows with labels science and maths:

df2 = df.drop(['Science','Maths'])
print('dataframe after deletion of rows Science and Maths:\n',df2)
# c) to remove the duplicate rows labelled ‘Hindi’:

df3 = df.drop('Hindi')
print('dataframe after deletion of row Hindi: \n',df3)
# d) to add row label english with df values for each column:

df.loc['English']=[98,76,99,54,43]
print('dataframe after adding row label English:\n',df)
# e) to modify the values of English as 12000:

df.loc['English']=[12000,12000,12000,12000,12000]
print('dataframe after modifying values of english to 12000:\n',df)
# f) to modify value of Maths marks of Riya to 90:

df.Riya['Maths']=90
print('dataframe after modification: \n',df)
# g) to Modify all values of a dataframe to same values in all rows and

columns:
df[:]= 100
print('dataframe after renaming: \n',df)
OUTPUT:
original dataframe:
Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 81 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
dataframe after deletion of row Science:

Maths 90 92 89 81 94
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
dataframe after deletion of rows Science and Maths:

Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
dataframe after deletion of row Hindi:

Maths 90 92 89 81 94
Science 91 81 91 71 95
dataframe after adding row label English:

Maths 90 92 89 81 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
English 98 76 99 54 43
dataframe after modifying values of english to 12000:

Maths 90 92 89 81 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
English 12000 12000 12000 12000 12000
dataframe after modification:
Maths 90 92 89 90 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
English 12000 12000 12000 12000 12000
dataframe after renaming:

Maths 100 100 100 100 100
Science 100 100 100 100 100
Hindi 100 100 100 100 100
Hindi 100 100 100 100 100
English 100 100 100 100 100
>>>
#to read from csv file dept. csv and create a dataframe from it and also
display the output. (Devangi,Aadya)
import pandas as pd
df=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’)
print(‘\noriginal dataframe is as follows:’)
print(df)
#to define column headings as ‘COL1’,’COL2’,’COL3’ using names parameter

of
read_csv () for the above csv file:
df2=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
names = [‘COL1’,’COL2’,’COL3’])
print(‘\nmodified dataframe is as follows:’)
print(df2)
#to use default column heading instead of specifying own column heading.
header = None)
print(‘\ndataframe with default column headings:’)
print(df3)
#to skip first row of dept. csv file

df4=pd.read_csv(‘;C:\\Users\\devangi
jain\\OneDrive\\Desktop\\dept.csv’,names=[‘COL1’,’COL2’,’COL3’],skiprows=1)
print(‘\ndataframe after skipping the first row from csv file:’)
print(df4)
#to create a dataframe using dataframe in such a way

#that first two rows of the dataframe become column heading.
header = [1,2])
print(‘\ndataframe with the first two rows as column header :’)
print(df5)
OUTPUT:
first dataframe:
C1 C2 C3
R1 1 2.0 3.0
R2 4 5.0 NaN
R3 6 NaN NaN
second dataframe:
C2 C5
R4 10 20.0
R2 30 NaN
R5 40 50.0
Dataframe after concatenation:

C1 C2 C3 C5
R1 1.0 2.0 3.0 NaN
R2 4.0 5.0 NaN NaN
R3 6.0 NaN NaN NaN
R4 NaN 10.0 NaN 20.0
R2 NaN 30.0 NaN NaN
R5 NaN 40.0 NaN 50.0
new dataframe:
C1 C2 C3 C5
0 1.0 2.0 3.0 NaN
1 4.0 5.0 NaN NaN
2 6.0 NaN NaN NaN
3 NaN 10.0 NaN 20.0
4 NaN 30.0 NaN NaN
5 NaN 40.0 NaN 50.0
dataframe concatenated along the columns:

C1 C2 C3 C4 C5
R1 1.0 2.0 3.0 NaN NaN
R2 4.0 5.0 NaN 30.0 NaN
R3 6.0 NaN NaN NaN NaN
R4 NaN NaN NaN 10.0 20.0
R5 NaN NaN NaN 40.0 50.0
dataframe after appending one after another:

C1 C2 C3 C5
R1 1.0 2.0 3.0 NaN
R2 4.0 5.0 NaN NaN
R3 6.0 NaN NaN NaN
R4 NaN 10.0 NaN 20.0
R2 NaN 30.0 NaN NaN
R5 NaN 40.0 NaN 50.0
new dataframe such that new indexes are generated after appending:
C1 C2 C3 C5
0 1.0 2.0 3.0 NaN
1 4.0 5.0 NaN NaN
2 6.0 NaN NaN NaN
3 NaN 10.0 NaN 20.0
4 NaN 30.0 NaN NaN
5 NaN 40.0 NaN 50.0
a new dataframe after appending in ascending order:

C1 C2 C3 C5
R1 1.0 2.0 3.0 NaN
R2 4.0 5.0 NaN NaN
R3 6.0 NaN NaN NaN
R4 NaN 10.0 NaN 20.0
R2 NaN 30.0 NaN NaN
R5 NaN 40.0 NaN 50.0
#to read from csv file dept. csv and create a dataframe from it and also
display the output.(Devangi,Aadya)
import pandas as pd
df=pd.read_csv(’C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’)
print(df)
#to define column headings as ‘COL1’,’COL2’,’COL3’ using names parameter

of
read_csv () for the above csv file
names = [‘COL1’,’COL2’,’COL3’])
print(‘\nmodified dataframe is as follows:’)
print(df2)
#to use default column heading instead of specifying own column heading.
header = None)
print(‘\ndataframe with default column headings:’)
print(df3)
#to skip first row of dept. csv file

df4=pd.read_csv(‘C:\\Users\\devangi
jain\\OneDrive\\Desktop\\dept.csv’,names=[‘COL1’,’COL2’,’COL3’],skiprows=1)
print(‘\ndataframe after skipping the first row from csv file:’)
print(df4)
#to create a dataframe using dataframe in such a way

#that first two rows of the dataframe become column heading.
header = [1,2])
print(‘\ndataframe with the first two rows as column header :’)
print(df5)
OUTPUT:
original dataframe is as follows:

Roll.no Name Marks
0 101 Tia 67.8
1 102 Radha 78.9
2 103 Aarti 78.9
3 104 Navya 100.0
4 105 Christi 88.5
5 106 Andrew 67.5
modified dataframe is as follows:
COL1 COL2 COL3
0 Roll.no Name Marks
1 101 Tia 67.8
2 102 Radha 78.9
3 103 Aarti 78.9
4 104 Navya 100.0
5 105 Christi 88.5
6 106 Andrew 67.5
dataframe with default column headings:

0 1 2
0 Roll.no Name Marks
1 101 Tia 67.8
2 102 Radha 78.9
3 103 Aarti 78.9
4 104 Navya 100.0
5 105 Christi88.5
6 106 Andrew 67.5
dataframe after skipping the first row from csv file:

COL1 COL2 COL3
0 101 Tia 67.8
1 102 Radha 78.9
2 103 Aarti 78.9
3 104 Navya 100.0
4 105 Christi 88.5
5 106 Andrew 67.5
dataframe with the first two rows as column header :

101 Tia 67.8
102 Radha 78.9
0 103 Aarti 78.9
1 104 Navya 100.0
2 105 Christi 88.5
3 106 Andrew 67.5
#using dataframe and csv for analysis:

(Devangi,Aadya)
import numpy as np
import pandas as pd
df=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept2.csv’)
print(df)
#to export the contents of this dataframe to csv file state. csv
a=[7830,931,7452.4,np.NaN]
g=[11950,818,1930,2737]
k=[113.1,1.7,2604.8,np.NaN]
p=[7152,33,11586.2,16440.5]
t=[44.1,23.2,814.6,0.5]
u=[140169.2,2184.4,13754,30056]
state=[‘AndhraP’Gujarat’’Kerala’’Punjab’,’Tripura’,’Uttar P’]
df = pd.DataFrame([a,g,k,p,t,u],index=state,columns
=[‘Fruits’,’Pulses’,’Rice’,’Wheat’])
df.to_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\state.csv’)
#to export the content of this dataframe to a csv file state. csv. Replace NaN
values with Null
df.to_csv(‘C:\\Users\\devangi
jain\\OneDrive\\Desktop\\state.csv’,na_rep=’Null’)
#to export the contents of this dataframe without row indexes where NaN
stored as Null
jain\\OneDrive\\Desktop\\state.csv’,na_rep=’Null’,index=None)
#to export the contents of this dataframe to a csv file state.csv

#without column header. Replace NaN values with Null
jain\\OneDrive\\Desktop\\state.csv’,na_rep=’Null’,header=False)
OUTPUT:
original dataframe is as follows:
Fruits Pulses Rice Wheat
Andhra P. 7830.0 931.0 7452.4 NaN
Gujarat 11950.0 818.0 1930.0 2737.0
Kerala 113.1 1.7 2604.8 NaN
Punjab 7152.0 33.0 11586.2 16440.5
Tripura 44.1 23.2 814.6 0.5
Uttar P. 140169.2 2184.4 13754.0 30056.0
#create a line chart depicting average height and weight of persons aged 8 to
16 (Aadya and Devangi)
import matplotlib.pyplot as plt
height=[121.9,124.5,129.5,134.6,139.7,147.3,152.4,157.5,162.6]
weight=[19.7,21.3,23.5,25.9,28.5,32.1,35.7,39.6,43.2]
plt.plot(weight,height)
plt.xlabel('weight in kg')
plt.ylabel('height in cm')
plt.plot(weight,height,color='green',marker='*',markersize=10,linestyle='dashe
d',linewidth=2)
plt.title('Average weight with respect ot the height')
plt.show()
OUTPUT:
#plotting the values of sine,cosine and tangent for the same array
a2 in a line chart (Devangi and Aadya)
import numpy as np
ar2 = [1,7,21,35,35,21,7,1]
s2 = np.sin(ar2)
c2 = np.cos(ar2)
t2 = np.tan(ar2)
plt.plot(ar2,s2,color='cyan',label='sine')
plt.plot(ar2,c2,color='red',label='cosine')
plt.plot(ar2,t2,color='black',linestyle='dashed',label='tangent')
plt.grid(True)
plt.xlabel('array values')
plt.ylabel('sine,cosine,tangent')
plt.title('trigonometric functions',fontsize=10,color='blue')
plt.legend(loc=4)
plt.show()
OUTPUT:
#create a dataframe from a csv and then plot it's line chart (Aadya
and Devangi)
import pandas as pd
df=pd.read_csv('C:\\Users\\devangi
jain\\OneDrive\\Desktop\\melasales.csv')
print('original data frame:')
print(df)
#create a line plot of different color for each week
df.plot()
#givw title as "Mela Sales Report"
plt.title('Mela Sales Report')
#label x axis as "days"
plt.xlabel('days')
#label y axis as "sales in Rs."
plt.ylabel('sales in Rs.')
plt.xticks(df.index,df.day)
plt.show()
OUTPUT:
original data frame:

Unnamed: 0 day week 1 week 2 week 3
0 0 mon 5000 3000 5800
1 1 tue 5900 3000 5800
2 2 wed 6500 5000 3500
3 3 thur 3500 5500 2500
4 4 fri 4000 3000 3000
5 5 sat 5300 4300 5300
6 6 sun 7900 5900 6000
#to plot bar chart for marks obtained by first five roll numbers
(Aadya and Devangi)

x= ['anu' ,'babu', 'sweety','meena', 'navya']
y=[90,45,78,56,98]
plt.bar(x,y,color='red')
plt.xlabel('students name')
plt.ylabel('marks obtained')
plt.title('Result')
plt.show()
OUTPUT:
#to plot horizontal bar graph for users of 5 different computer
languages(Aadya and Devangi)

l=['python','java','c++','java script','html']
n=[220,557,123,352,52]
plt.barh(l,n)
plt.xlabel('number of users')
plt.ylabel(languages)
plt.title('users of different computer languages')
plt.show()
OUTPUT:
#plot a bar chart from a csv file (Devangi and Aadya)
import pandas as pd
df=pd.read_csv('C:\\Users\\AADYA KISHORE\\Desktop\melasales.csv')
print('original data frame:')
print(df)
df.plot(kind='bar',x='day',color=['red','yellow','purple'],linestyle='dashed',width
=0.5,edgecolor='green')
plt.xlabel('days')
plt.ylabel('sales in Rs.')
plt.title('Mela Sales Report')
plt.show()
OUTPUT:
original data frame:

Unnamed: 0 day week 1 week 2 week 3
0 0 mon 5000 3000 5800
1 1 tue 5900 3000 5800
2 2 wed 6500 5000 3500
3 3 thur 3500 5500 2500
4 4 fri 4000 3000 3000
5 5 sat 5300 4300 5300
6 6 sun 7900 5900 6000
# Display a histogram which show the marks of the students in a class
(Devangi and Aadya)
score=[0,41,52,72, 80,95] #data set that has to be plotted in the form of
frequency
b = [0,33,50,60,75,85,100] #bins
w=[2,4,10,6,17,15]
plt.hist(score,bins=b,weights=w,edgecolor='black',color='r')
plt.xlabel('range of percentage')
plt.ylabel('frequency')
plt.show()
OUTPUT:
#display a histogram corressponding to the height and weight
(Devangi and Aadya)
import pandas as pd
data={'name':['arnav','sheela','azhar','bincy','yash','nazar'],'height':
[60,61,63,65,61,60],'weight':[47,89,52,58,50,47]}
df=pd.DataFrame(data)
df.plot(kind='hist')
plt.xlabel('height and weight')
plt.ylabel('frequency')
plt.title('corresponding height and weight of the students')
plt.show()
OUTPUT:
#create histogram using dataframe sorresponding to attributes having
numeric values and show customisation (Aadya and Devangi)
import pandas as pd
data={'name':['aarav','sheela','azhar','bincy','yash','nazar'],'height':
[60,61,63,65,61,60],'weight':[47,89,52,58,50,47]}
df=pd.DataFrame(data)
df.plot(kind='hist',edgecolor='Green',linewidth=2,linestyle=':',fill=False,hatch=’
o')
plt.show()
OUTPUT:

Pandas Pract

Uploaded by

Copyright:

Available Formats

You might also like

Pandas Pract

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Pandas Pract

Uploaded by

Copyright:

Available Formats

#create a series of marks (Aadya and Devangi)

# b) list in dictionary method

# c) dictionary in dictionary / nested dictionary:

# d) dictionary in list method:

a)dataframe using nested list:

b) dataframe using list in dictionary method:

c) dataframe using nested dictionary:

d) dataframe using dictionary in list method:

# a) to display population of cities

# b) to display population and no. of schools in all cities

# c) to display number of schools in Kolkata

# e) to display all rows where no. of hospitals<200

# g) to display coloumns of hospitals

# h) to display all rows from delhi to kolkata using slicing:

# i) to display no. of hospitals in Mumbai:

# j) to display population and hospitals of Delhi and Chennai:

Population Hospitals School

display population of cities:

display population of cities:

Population and number of schools of all cities:

no. of schools in Kolkata: 7226

elements in dataframe < 200?

add 2 to each element of dataframe:

rows of the dataframe where hospitals<200:

coloumns of hospitals in the form of series:

coloumns of hospitals in the form of dataframe:

rows from delhi to kolkata:

population and hospitals of Delhi and Chennai:

#create dataframe and use methods-head,tail,len,count (Aadya and Devangi)

# a) to retrieve upper 3 rows:

# b) to print lower 2 rows:

# c) to count no. of elements in each row and coloumn:

# d) to display length of the dataframe:

no. of elements in each row:

no. of elements in each coloumn:

length of the dataframe:

# a) add a column density with 1219 for all its rows:

# c) add a column named ‘industry’ before column schools:

# to delete column density using drop():

# to delete column industry using del statement:

dataframe after adding column industry before school:

dataframe after deleting column density using drop()

dataframe after deleting column industry using del statement:

dataframe after deleting column density using pop() :

#CREATE A DATAFRAME AND SHOW RENAMING OF ROWS AND COLUMNS

# a) to rename all column labels ‘Population’, ’Hospitals’ ,’School’ to ‘P’,

# b) to rename ‘Population’ label to ‘P’:

Dataframe after renaming:

dataframe after renaming ‘Population’ label to ‘P’:

dataframe after renaming:

#CREATE A DATAFRAME AND SHOW ADDITION ,DELETION AND

# a) to delete science row using labelled indexing:

# b) to delete rows with labels science and maths:

# c) to remove the duplicate rows labelled ‘Hindi’:

# d) to add row label english with df values for each column:

# e) to modify the values of English as 12000:

# f) to modify value of Maths marks of Riya to 90: