Pandas Pract

You might also like

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 48

#create a series of marks (Aadya and Devangi)

import pandas as pd
s = pd.Series([45,65,24,89], index = ['Term1','Term 2','Term 3','Term 4'])
print(s)

OUTPUT:
Term1 45
Term 2 65
Term 3 24
Term 4 89
Dtype: int64
#create a series of students of section of 12th class (Aadya and Devangi):
import pandas as pd
d = {‘A’:35,’B’:40,’C’:32,’D’:30}
s = pd.Series(d)
print(s)

OUTPUT:
A 35
B 40
C 32
D 30
dtype: int64
#create a series object from a list object containing number of days of first
four months of a year. change the index values from 0,1,2,3 to
‘jan’,’feb’,’mar’,’apr’ respectively And data type to float(Aadya and Devangi)

Import pandas as pd
Import numpy as np
s= pd.Series([31,28,31,30], index =[‘jan’,’feb’,’mar’,’apr’], dtype = np.float64)
print(s)

OUTPUT:
jan 31.0
feb 28.0
mar 31.0
apr 30.0
dtype: float64
#to create a series object displaying all the attributes (Aadya and
Devangi)
 
import pandas as pd
s = pd.Series([31,28,31,30])
index = ['jan','feb','mar','apr']
s.name= 'month’
s.index.name = 'name'
print(s)
print('name of the series is as follows : 's.name)
print('index of the series = ',s.index)
print('name of the index = ', s.index.name )
print('values of the series are: ',s.values)
print('shape of the series : ',s.shape)
print('no. of bytes occypied by series elements : ' ,s.nbytes)
print('total no. of elements in a series: ',s.size)
print('is there None value in the series ?',s.hasnans)
print('is the series empty?',s.empty)
 
 
output:
name
0    31
1    28
2    31
3    30
Name: month, dtype: int64
name of the series is as follows :  month
index of the series =  RangeIndex(start=0, stop=4, step=1,
name='name')
name of the index =  name
values of the series are:  [31 28 31 30]
shape of the series :  (4,)
no. of bytes occypied by series elements :  32
total no. of elements in a series:  4
is there None value in the series ? False
is the series empty? False
 
 
 
 
 
 
 
#create a series with data as marks of five students and index as the names
of students (Aadya and Devangi)

import pandas as pd
s = pd.Series([65,97,38,81,78],index = ['Ajay','Vipul','Shyam','Harsh','Kartik'])
print(s)
#marks of vipul by using index method
print('\nmarks of Vipul : ',s[1])
print('\nmarks of Vipul by using label indexing :',s['Vipul'])
#Display the marks of Ajay,Shyam and Kartik only by using index method
print('\nmarks of Ajay,shyam,Kartik:', s[[0,2,4]])
#Display the marks of Ajay,Shyam and Kartik only by using labelled index 
print('\nmarks of Ajay,shyam,Kartik:',s[['Ajay','Shyam','Kartik']])
#Display the marks of Ajay,Shyam and Kartik only by using loc
print('\nmarks of Ajay,Shyam and kartik: ',s.loc[['Ajay','Shyam','Kartik']])
#Display the marks of Ajay,Shyam and Kartik only by using iloc
print('\nmarks of Ajay,Shyam and kartik: ',s.iloc[[0,2,4]])
#display 97,38,81 using slicing and use default indexing
print('\nvalues of index no. are :',s[1:4])
#display 97,38,81 using slicing and use labelled indexing
print('\nvalues of index no. are : ',s['Vipul':'Harsh'])
#display marks of alternate students using slicing method using labelled
indexing
print('\nmarks of alternate students : ',s['Ajay':'Kartik':2])
#display marks of all those students who scored more than 60
print('\nmarks of students who scored more than 60: ',s[s>60])
print('\nmarks of students who scored more than 60: ',s.loc[s>60]) #using loc
method
 
 
output:
Ajay      65
Vipul     97
Shyam     38
Harsh     81
Kartik    78
dtype: int64
marks of Vipul :  97
marks of Vipul by using label indexing : 97
marks of Ajay,shyam,Kartik: Ajay      65
Shyam     38
Kartik    78
dtype: int64
marks of Ajay,shyam,Kartik: Ajay      65
Shyam     38
Kartik    78
dtype: int64
marks of Ajay,Shyam and kartik:  Ajay      65
Shyam     38
Kartik    78
dtype: int64
marks of Ajay,Shyam and kartik:  Ajay      65
Shyam     38
Kartik    78
dtype: int64
values of index no. are : Vipul    97
Shyam    38
Harsh    81
dtype: int64
values of index no. are :  Vipul    97
Shyam    38
Harsh    81
dtype: int64
marks of alternate students :  Ajay      65
Shyam     38
Kartik    78
dtype: int64
marks of students who scored more than 60:  Ajay      65
Vipul     97
Harsh     81
Kartik    78
dtype: int64
marks of students who scored more than 60:  Ajay      65
Vipul     97
Harsh     81
Kartik    78
dtype: int64
 
 
 
 
 
 
 
 
 
#create two series and perform all the
mathematical operations  (Aadya and Devangi)

import pandas as pd
series1 = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
series2 = pd.Series([10,20,-10,-50,-100],index = ['z','y','a','c','e'])
print(series1)
print(series2)
#addition of 2 series using + operator
seriesSum = series1+series2
print(seriesSum)
#addition of 2 series using 'add' method
series_add = series1.add(series2)
print(series_add)
#subtraction of 2 series using - operator
seriesSub = series1-series2
print(seriesSub)
#subtraction of 2 series using 'sub' method
series_sub = series1.sub(series2)
print(series_sub)
#multiplication of 2 series using * operator
seriesMul = series1+series2
print(seriesMul)
#multiplication of 2 series using 'mul' method
series_mul = series1.mul(series2)
print(series_mul)
#division of 2 series using / operator
seriesDiv = series1/series2
print(seriesDiv)
#division of 2 series using 'div' operator
series_Div = series1.div(series2)
print(series_Div)
 
 
OUTPUT:
a    1
b    2
c    3
d    4
e    5
dtype: int64
z     10
y     20
a    -10
c    -50
e   -100
dtype: int64
a    -9.0
b     NaN
c   -47.0
d     NaN
e   -95.0
y     NaN
z     NaN
dtype: float64
a    -9.0
b     NaN
c   -47.0
d     NaN
e   -95.0
y     NaN
z     NaN
dtype: float64
a     11.0
b      NaN
c     53.0
d      NaN
e    105.0
y      NaN
z      NaN
dtype: float64
a     11.0
b      NaN
c     53.0
d      NaN
e    105.0
y      NaN
z      NaN
dtype: float64
a    -9.0
b     NaN
c   -47.0
d     NaN
e   -95.0
y     NaN
z     NaN
dtype: float64
a    -10.0
b      NaN
c   -150.0
d      NaN
e   -500.0
y      NaN
z      NaN
dtype: float64
a   -0.10
b     NaN
c   -0.06
d     NaN
e   -0.05
y     NaN
z     NaN
dtype: float64
a   -0.10
b     NaN
c   -0.06
d     NaN
e   -0.05
y     NaN
z     NaN
dtype: float64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
#series of numbers & operations on series  (Aadya and Devangi)

import pandas as pd
s = pd.Series([6700,5600,5000,5200,2800], index = ['A','B','C','D','E'])
print(s)
#replace value of index 'D' to 3000
s['D']=3000
print('\nseries after modification')
print(s)
#replace values from 'B' to 'D' to 2500
s['B':'D'] = 2500
print('\nseries after modification')
print(s)
# display the vector operation > 2800 with the elements of series
print('\nseries elements > 2800')
print(s>2800)
#increase series elements by 10
s = s+10
print('\nseries after adding 10 to all the elements')
print(s)
#series in ascending order
print('\nseries in ascending order')
print(s.sort_values())
#series in descending order
print('\nseries in descending order')
print(s.sort_values(ascending=False))
 
 
OUTPUT:

A    6700
B    5600
C    5000
D    5200
E    2800
dtype: int64
 
series after modification
A    6700
B    5600
C    5000
D    3000
E    2800
dtype: int64
 
series after modification
A    6700
B    2500
C    2500
D    2500
E    2800
dtype: int64
series elements > 2800
A     True
B    False
C    False
D    False
E    False
dtype: bool
series after adding 10 to all the elements
A    6710
B    2510
C    2510
D    2510
E    2810
dtype: int64
series in ascending order
B    2510
C    2510
D    2510
E    2810
A    6710
dtype: int64
series in descending order
A    6710
E    2810
B    2510
C    2510
D    2510
dtype: int64
#Create dataframe using all 4 methods (Aadya and Devangi)

import pandas as pd
# a) list in list method / nested list
d1=[[10927986,189,7916],[12691836,208,8508],[4631392,149,7226],
[4328063,157,7617]
df1=pd.DataFrame(d1,index=['Delhi','Mumbai','Kolkata','Chennai'],coloumns=['
Population','Hospitals','School'])
print('a) dataframe using nested list:')
print(df1)

# b) list in dictionary method


d2={'Population':[10927986,12691836,4631392,4328063],'Hospitals':
[189,208,149,157],'School':[7916,8508,7226,7617]}
df2=pd.DataFrame(d2,index=['Delhi','Mumbai','Kolkata','Chennai'])
print('\nb) dataframe using list in dictionary method:')
print(df2)

# c) dictionary in dictionary / nested dictionary:


d3={'Population':
{'Delhi':10927986,'Mumbai':12691836,'Kolkata':4631392,'Chennai':4328063},'
Hospitals':{'Delhi':189,'Mumbai':208,'Kolkata':149,'Chennai':157},'School':
{'Delhi':7916,'Mumbai':8508,'Kolkata':7226,'Chennai':7617}}
df3=pd.DataFrame(d3)
print('\nc) dataframe using nested dictionary:')
print(df3)

# d) dictionary in list method:


a={'Population':10927986,'Hospitals':189,'School':7916}
b={'Population':12691836,'Hospitals':208,'School':8508}
c={'Population':4631392,'Hospitals':149,'School':7226}
d={'Population':4328063,'Hospitals':157,'School':7617}
d4=[a,b,c,d]
df4=pd.DataFrame(d4,index=['Delhi','Mumbai','Kolkata','Chennai'])
print('\nd) dataframe using dictionary in list method:')
print(df4)
OUTPUT:

a)dataframe using nested list:


Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

b) dataframe using list in dictionary method:


Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

c) dataframe using nested dictionary:


Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

d) dataframe using dictionary in list method:


Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
#creating a data frame and using all attributes in one data frame
(Devangi and Aadya)
 
import pandas as pd
df = pd.DataFrame([[25,24],['Neha','Rohit'],['Female','Male']],index =
['age','name','sex'],columns = ['marketing','sales'])
print('\nthe dataframe is as follows:\n',df)
 
#change the index of the DataFrame
df.index = ['Age','Name','Gender']
print('\nindex of the data frame :',df.index)
 
#statement to change column labels to COL1,COL2
df.columns = ['COL1','COL2']
print('\ncolumns after being modified:',df.columns)
print(df)
 
#display data type of each column
print('\ndata type of each columns:\n',df.dtypes)
 
#axes of the dataframe
print('\naccesing group of rows and columns by labels:\n',df.axes)
 
#display size of data frame
print('\nsize of data frame :',df.size)
 
#display the dimensions of the data frame
print('\ndimension of the data frame :',df.shape)
 
#values in the data frame
print('\nvalues in the data frame:\n',df.values)
 
#display True if DF is empty otherwise False
print('\nis the data frame empty:',df.empty)
 
#to transpose the dataframe
#that means that the row and column labels of DF would replace each other
print('\ndataframe after transposing:\n',df.T)
 
 
OUTPUT:
the dataframe is as follows:
     marketing  sales
age         25     24
name      Neha  Rohit
sex     Female   Male
 
index of the data frame : Index(['Age', 'Name', 'Gender'], dtype='object')
 
columns after being modified: Index(['COL1', 'COL2'], dtype='object')
    
 COL1   COL2
Age         25     24
Name      Neha  Rohit
Gender  Female   Male
 
data type of each columns:
COL1    object
COL2    object
dtype: object
 
accesing group of rows and columns by labels:
[Index(['Age', 'Name', 'Gender'], dtype='object'), Index(['COL1',
'COL2'], dtype='object')]
 
size of data frame : 6
 
dimension of the data frame : (3, 2)
 
values in the data frame:
[[25 24]
['Neha' 'Rohit']
['Female' 'Male']]
 
is the data frame empty: False
 
dataframe after transposing:
     Age   Name  Gender
COL1  25   Neha  Female
COL2  24  Rohit    Male
#Accessing elements of a dataframe using slicing,indexing,boolean
indexing,loc and iloc (Devangi and Aadya)

import pandas as pd
d={'Population':[10927986,12691836,4631392,4328063,],'Hospitals':
[189,208,149,157],'School':[7916,8508,7226,7617]}
df=pd.DataFrame(d,index=['Delhi','Mumbai','Kolkata','Chennai'])
print('DATA FRAME:')
print(df)

# a) to display population of cities


print('\ndisplay population of cities:')
print(df.Population) #by dot notation
print('\ndisplay population of cities:')
print(df['Population']) #by index()

# b) to display population and no. of schools in all cities


print('Population and number of schools of all cities:')
print(df[['Population','School']])

# c) to display number of schools in Kolkata


print('no. of schools in Kolkata:',df.School['Kolkata']) #by labelled indexing
print('no. of schools in Kolkata:',df.School[2]) #by default indexing

# d) to show vectorisation:
print('\nelements in dataframe < 200?')
print(df<200)
print('\nadd 2 to each element of dataframe:')
print(df+2)

# e) to display all rows where no. of hospitals<200


print('\nrows of the dataframe where hospitals<200:')
print(df[df.Hospitals<200]) #by dot notation
print('\nrows of the dataframe where hospitals<200:')
print(df[df['Hospitals']<200]) #by index()

# f) to display population and school for all the rows only if number of
Hospitals are less than 200 and number of schools are more than 7500
print('\npopulation and school for all the rows only if number of Hospitals are
less than 200 and number of schools are more than 7500')
print(df[(df.Hospitals<200)&(df.School>7500)][['Population','School']]) #by dot
notation

# g) to display coloumns of hospitals


print('\ncoloumns of hospitals in the form of series:')
print(df.loc[:,'Hospitals']) #by iloc()
print(df.iloc[:,1]) #by iloc
print('\ncoloumns of hospitals in the form of dataframe:')
print(df.loc[:,['Hospitals']])

# h) to display all rows from delhi to kolkata using slicing:


print('rows from delhi to kolkata:')
print('df.loc['Delhi':'Kolkata','Population':'Hospitals'])

# i) to display no. of hospitals in Mumbai:


print('\nno. of Hospitals in Mumbai are:',df.loc['Mumbai','Hospitals'])

# j) to display population and hospitals of Delhi and Chennai:


print('\npopulation and hospitals of Delhi and Chennai:')
print(df.iloc[[0,3],[0,1]])

OUTPUT:

DATA FRAME:

Population Hospitals School


Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

display population of cities:


Delhi 10927986
Mumbai 12691836
Kolkata 4631392
Chennai 4328063
Name: Population, dtype: int64

display population of cities:


Delhi 10927986
Mumbai 12691836
Kolkata 4631392
Chennai 4328063
Name: Population, dtype: int64

Population and number of schools of all cities:


Population School
Delhi 10927986 7916
Mumbai 12691836 8508
Kolkata 4631392 7226
Chennai 4328063 7617

no. of schools in Kolkata: 7226


no. of schools in Kolkata: 7226

elements in dataframe < 200?


Population Hospitals School
Delhi False True False
Mumbai False False False
Kolkata False True False
Chennai False True False

add 2 to each element of dataframe:


Population Hospitals School
Delhi 10927988 191 7918
Mumbai 12691838 210 8510
Kolkata 4631394 151 7228
Chennai 4328065 159 7619

rows of the dataframe where hospitals<200:


Population Hospitals School
Delhi 10927986 189 7916
Kolkata 4631392 149 7226
Chennai 4328063 157 7617
rows of the dataframe where hospitals<200:
Population Hospitals School
Delhi 10927986 189 7916
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

population and school for all the rows only if number of Hospitals are less
than 200 and number of schools are more than 7500:
Population School
Delhi 10927986 7916
Chennai 4328063 7617

coloumns of hospitals in the form of series:


Delhi 189
Mumbai 208
Kolkata 149
Chennai 157
Name: Hospitals, dtype: int64

Delhi 189
Mumbai 208
Kolkata 149
Chennai 157
Name: Hospitals, dtype: int64

coloumns of hospitals in the form of dataframe:


Hospitals
Delhi 189
Mumbai 208
Kolkata 149
Chennai 157

rows from delhi to kolkata:


Population Hospitals
Delhi 10927986 189
Mumbai 12691836 208
Kolkata 4631392 149
no. of Hospitals in Mumbai are: 208

population and hospitals of Delhi and Chennai:


Population Hospitals
Delhi 10927986 189
Chennai 4328063 157

#create dataframe and use methods-head,tail,len,count (Aadya and Devangi)


import pandas as pd
d={'Rollno.':[115,236,307,422],'Name':['Pavni','Rishi','Preet','Paula'],'Marks':
[97.5,98.0,98.5,98.0]}
df=pd.DataFrame(d,index=['secA','secB','secC','secD'])
print('DATA FRAME:')
print(df)

# a) to retrieve upper 3 rows:


print('upper 3 rows:')
print(df.head(3))

# b) to print lower 2 rows:


print ('lower 2 rows:')
print(df.tail(2))

# c) to count no. of elements in each row and coloumn:


print('no. of elements in each row:')
print(df.count(1))
print('no. of elements in each coloumn:')
print(df.count())

# d) to display length of the dataframe:


print('length of the dataframe:')
print(len(df))

OUTPUT:

DATA FRAME:
Rollno. Name Marks
secA 115 Pavni 97.5
secB 236 Rishi 98.0
secC 307 Preet 98.5
secD 422 Paula 98.0
upper 3 rows:
Rollno. Name Marks
secA 115 Pavni 97.5
secB 236 Rishi 98.0
secC 307 Preet 98.5

lower 2 rows:
Rollno. Name Marks
secC 307 Preet 98.5
secD 422 Paula 98.0

no. of elements in each row:


secA 3
secB 3
secC 3
secD 3
dtype: int64

no. of elements in each coloumn:


Rollno. 4
Name 4
Marks 4
dtype: int64

length of the dataframe:


4
#CREATING A DATAFRAME AND SHOWING ADDITION AND DELETION OF
COLUMNS:
(Aadya and Devangi)
import pandas as pd d1=[[10927986,189,7916],[12691836,208,8508],
[4631392,149,7226],[4328063,157,7617]]
df1=pd.DataFrame(d1,index=['Delhi','Mumbai','Kolkata','Chennai'],columns=['P
opulation','Hospitals','School'])
print(“original dataframe \n”,df1)

# a) add a column density with 1219 for all its rows:


df1['density']=1219
print('dataframe after adding column density with 1219 as value of all
rows:\n',df1)

# b) add a column density with different values for all its rows:
df1['density']= [1500,1219,1630,1050]
print(‘dataframe after adding column density with different values for all its
rows:\n’,df1)

# c) add a column named ‘industry’ before column schools:


df1.insert(2,'industry',[12000,13567,6789,1234],True)
print(‘dataframe after adding column industry before school:\n’,df1)

# d) to delete a column:

# to delete column density using drop():


df1 = df1.drop('density', axis = 1)
print(‘dataframe after deleting column density using drop()\n’,df1)

# to delete column industry using del statement:


del df1['industry']
print(‘dataframe after deleting column industry using del statement:\n’,df1)

# e) add a new column with label as density with different values for all rows
using loc method:
df1.loc[:,'density']=[12,13,14,15]
print(‘dataframe after adding a new column with label as density with different
values for all rows using loc method\n:’,df1)
# f) to delete column density using pop():
df1.pop('density')
print(‘dataframe after deleting column density using pop()\n’,df1)

OUTPUT:

Original dataframe:
Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

dataframe after adding column density with 1219 as value of all rows:
Population Hospitals School density
Delhi 10927986 189 7916 1219
Mumbai 12691836 208 8508 1219
Kolkata 4631392 149 7226 1219
Chennai 4328063 157 7617 1219

dataframe after adding column density with different values for all its rows:
Population Hospitals School density
Delhi 10927986 189 7916 1500
Mumbai 12691836 208 8508 1219
Kolkata 4631392 149 7226 1630
Chennai 328063 157 7617 1050

dataframe after adding column industry before school:


Population Hospitals industry School density
Delhi 10927986 189 12000 7916 1500
Mumbai 12691836 208 13567 8508 1219
Kolkata 4631392 149 6789 7226 1630
Chennai 4328063 157 1234 7617 1050

dataframe after deleting column density using drop()


Population Hospitals industry School
Delhi 10927986 189 12000 7916
Mumbai 12691836 208 13567 8508
Kolkata 4631392 149 6789 7226
Chennai 4328063 157 1234 7617

dataframe after deleting column industry using del statement:


Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

dataframe after adding a new column with label as density with different
values for all rows using loc method:
Population Hospitals School density
Delhi 10927986 189 7916 12
Mumbai 12691836 208 8508 13
Kolkata 4631392 149 7226 14
Chennai 4328063 157 7617 15

dataframe after deleting column density using pop() :


Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

#CREATE A DATAFRAME AND SHOW RENAMING OF ROWS AND COLUMNS


(Aadya and Devangi)
import pandas as pd d1=[[10927986,189,7916],[12691836,208,8508],
[4631392,149,7226],[4328063,157,7617]]
df1=pd.DataFrame(d1,index=['Delhi','Mumbai','Kolkata','Chennai'],columns=['P
opulation','Hospitals','School'])
print(‘original dataframe:\n’,df1)

# a) to rename all column labels ‘Population’, ’Hospitals’ ,’School’ to ‘P’,


’H’ ,’S’:
df2=df1.rename(columns={'Population':'P','Hospitals':'H','School':'S'})
print(‘Dataframe after renaming:\n’,df2)

# b) to rename ‘Population’ label to ‘P’:


df3=df1.rename(columns={'Population':'P'})
print(‘dataframe after renaming ‘Population’ label to ‘P’:\n’,df3)

# c) to rename row label ‘Delhi’ and ‘Chennai’ to ‘Del’ and ‘Che’ respectively:
df4=df1.rename(index={'Delhi':'Del','Chennai':'Che'})
print('Dataframe after renaming row label ‘Delhi’ and ‘Chennai’ to ‘Del’ and
‘Che’ respectively: \n',df4)

# d) to rename column label ‘population’ to ‘p’ and row label ‘Delhi’ and
‘Chennai’ to ‘Del’ and ‘Che’ respectively:
df5=df1.rename(columns={'Population':'P'},index={'Delhi':'Del','Chennai':'Che'}
)
print('dataframe after renaming: \n',df5)

OUTPUT:

original dataframe:
Population Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

Dataframe after renaming:


P H S
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

dataframe after renaming ‘Population’ label to ‘P’:


P Hospitals School
Delhi 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Chennai 4328063 157 7617

Dataframe after renaming row label ‘Delhi’ and ‘Chennai’ to ‘Del’ and ‘Che’
respectively:
Population Hospitals School
Del 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Che 4328063 157 7617

dataframe after renaming:


P Hospitals School
Del 10927986 189 7916
Mumbai 12691836 208 8508
Kolkata 4631392 149 7226
Che 4328063 157 7617
>>>

#CREATE A DATAFRAME AND SHOW ADDITION ,DELETION AND


MODIFICATION OF ROWS AND COLUMNS:
(Aadya and Devangi)
import pandas as pd
d1=[[90,92,89,81,94],[91,81,91,71,95],[97,96,88,67,99],[97,89,78,60,45]]
df=pd.DataFrame(d1,index=['Maths','Science','Hindi','Hindi'],columns=['Arnab',
'Ramit','Samriddhi','Riya','Mallika'])
print('original dataframe:\n',df)

# a) to delete science row using labelled indexing:


df1 = df.drop('Science')
print('dataframe after deletion of row Science \n',df1)

# b) to delete rows with labels science and maths:


df2 = df.drop(['Science','Maths'])
print('dataframe after deletion of rows Science and Maths:\n',df2)

# c) to remove the duplicate rows labelled ‘Hindi’:


df3 = df.drop('Hindi')
print('dataframe after deletion of row Hindi: \n',df3)

# d) to add row label english with df values for each column:


df.loc['English']=[98,76,99,54,43]
print('dataframe after adding row label English:\n',df)

# e) to modify the values of English as 12000:


df.loc['English']=[12000,12000,12000,12000,12000]
print('dataframe after modifying values of english to 12000:\n',df)

# f) to modify value of Maths marks of Riya to 90:


df.Riya['Maths']=90
print('dataframe after modification: \n',df)

# g) to Modify all values of a dataframe to same values in all rows and


columns:
df[:]= 100
print('dataframe after renaming: \n',df)

OUTPUT:
original dataframe:
Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 81 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45

dataframe after deletion of row Science:


Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 81 94
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45

dataframe after deletion of rows Science and Maths:


Arnab Ramit Samriddhi Riya Mallika
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45

dataframe after deletion of row Hindi:


Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 81 94
Science 91 81 91 71 95

dataframe after adding row label English:


Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 81 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
English 98 76 99 54 43

dataframe after modifying values of english to 12000:


Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 81 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
English 12000 12000 12000 12000 12000
dataframe after modification:
Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 90 94
Science 91 81 91 71 95
Hindi 97 96 88 67 99
Hindi 97 89 78 60 45
English 12000 12000 12000 12000 12000

dataframe after renaming:


Arnab Ramit Samriddhi Riya Mallika
Maths 100 100 100 100 100
Science 100 100 100 100 100
Hindi 100 100 100 100 100
Hindi 100 100 100 100 100
English 100 100 100 100 100
>>>

#to read from csv file dept. csv and create a dataframe from it and also
display the output. (Devangi,Aadya)
import pandas as pd
df=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’)
print(‘\noriginal dataframe is as follows:’)
print(df)

#to define column headings as ‘COL1’,’COL2’,’COL3’ using names parameter


of
read_csv () for the above csv file:
df2=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
names = [‘COL1’,’COL2’,’COL3’])
print(‘\nmodified dataframe is as follows:’)
print(df2)

#to use default column heading instead of specifying own column heading.
df3=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
header = None)
print(‘\ndataframe with default column headings:’)
print(df3)

#to skip first row of dept. csv file


df4=pd.read_csv(‘;C:\\Users\\devangi
jain\\OneDrive\\Desktop\\dept.csv’,names=[‘COL1’,’COL2’,’COL3’],skiprows=1)
print(‘\ndataframe after skipping the first row from csv file:’)
print(df4)

#to create a dataframe using dataframe in such a way


#that first two rows of the dataframe become column heading.
df5=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
header = [1,2])
print(‘\ndataframe with the first two rows as column header :’)
print(df5)

OUTPUT:
first dataframe:
C1 C2 C3
R1 1 2.0 3.0
R2 4 5.0 NaN
R3 6 NaN NaN

second dataframe:
C2 C5
R4 10 20.0
R2 30 NaN
R5 40 50.0

Dataframe after concatenation:


C1 C2 C3 C5
R1 1.0 2.0 3.0 NaN
R2 4.0 5.0 NaN NaN
R3 6.0 NaN NaN NaN
R4 NaN 10.0 NaN 20.0
R2 NaN 30.0 NaN NaN
R5 NaN 40.0 NaN 50.0

new dataframe:
C1 C2 C3 C5
0 1.0 2.0 3.0 NaN
1 4.0 5.0 NaN NaN
2 6.0 NaN NaN NaN
3 NaN 10.0 NaN 20.0
4 NaN 30.0 NaN NaN
5 NaN 40.0 NaN 50.0

dataframe concatenated along the columns:


C1 C2 C3 C4 C5
R1 1.0 2.0 3.0 NaN NaN
R2 4.0 5.0 NaN 30.0 NaN
R3 6.0 NaN NaN NaN NaN
R4 NaN NaN NaN 10.0 20.0
R5 NaN NaN NaN 40.0 50.0

dataframe after appending one after another:


C1 C2 C3 C5
R1 1.0 2.0 3.0 NaN
R2 4.0 5.0 NaN NaN
R3 6.0 NaN NaN NaN
R4 NaN 10.0 NaN 20.0
R2 NaN 30.0 NaN NaN
R5 NaN 40.0 NaN 50.0

new dataframe such that new indexes are generated after appending:
C1 C2 C3 C5
0 1.0 2.0 3.0 NaN
1 4.0 5.0 NaN NaN
2 6.0 NaN NaN NaN
3 NaN 10.0 NaN 20.0
4 NaN 30.0 NaN NaN
5 NaN 40.0 NaN 50.0

a new dataframe after appending in ascending order:


C1 C2 C3 C5
R1 1.0 2.0 3.0 NaN
R2 4.0 5.0 NaN NaN
R3 6.0 NaN NaN NaN
R4 NaN 10.0 NaN 20.0
R2 NaN 30.0 NaN NaN
R5 NaN 40.0 NaN 50.0

#to read from csv file dept. csv and create a dataframe from it and also
display the output.(Devangi,Aadya)
import pandas as pd
df=pd.read_csv(’C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’)
print(‘\noriginal dataframe is as follows:’)
print(df)

#to define column headings as ‘COL1’,’COL2’,’COL3’ using names parameter


of
read_csv () for the above csv file
df2=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
names = [‘COL1’,’COL2’,’COL3’])
print(‘\nmodified dataframe is as follows:’)
print(df2)

#to use default column heading instead of specifying own column heading.
df3=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
header = None)
print(‘\ndataframe with default column headings:’)
print(df3)

#to skip first row of dept. csv file


df4=pd.read_csv(‘C:\\Users\\devangi
jain\\OneDrive\\Desktop\\dept.csv’,names=[‘COL1’,’COL2’,’COL3’],skiprows=1)
print(‘\ndataframe after skipping the first row from csv file:’)
print(df4)

#to create a dataframe using dataframe in such a way


#that first two rows of the dataframe become column heading.
df5=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept.csv’,
header = [1,2])
print(‘\ndataframe with the first two rows as column header :’)
print(df5)

OUTPUT:

original dataframe is as follows:


Roll.no Name Marks
0 101 Tia 67.8
1 102 Radha 78.9
2 103 Aarti 78.9
3 104 Navya 100.0
4 105 Christi 88.5
5 106 Andrew 67.5
modified dataframe is as follows:
COL1 COL2 COL3
0 Roll.no Name Marks
1 101 Tia 67.8
2 102 Radha 78.9
3 103 Aarti 78.9
4 104 Navya 100.0
5 105 Christi 88.5
6 106 Andrew 67.5

dataframe with default column headings:


0 1 2
0 Roll.no Name Marks
1 101 Tia 67.8
2 102 Radha 78.9
3 103 Aarti 78.9
4 104 Navya 100.0
5 105 Christi88.5
6 106 Andrew 67.5

dataframe after skipping the first row from csv file:


COL1 COL2 COL3
0 101 Tia 67.8
1 102 Radha 78.9
2 103 Aarti 78.9
3 104 Navya 100.0
4 105 Christi 88.5
5 106 Andrew 67.5

dataframe with the first two rows as column header :


101 Tia 67.8
102 Radha 78.9
0 103 Aarti 78.9
1 104 Navya 100.0
2 105 Christi 88.5
3 106 Andrew 67.5

#using dataframe and csv for analysis:


(Devangi,Aadya)
import numpy as np
import pandas as pd
df=pd.read_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\dept2.csv’)
print(‘\noriginal dataframe is as follows:’)
print(df)

#to export the contents of this dataframe to csv file state. csv
a=[7830,931,7452.4,np.NaN]
g=[11950,818,1930,2737]
k=[113.1,1.7,2604.8,np.NaN]
p=[7152,33,11586.2,16440.5]
t=[44.1,23.2,814.6,0.5]
u=[140169.2,2184.4,13754,30056]
state=[‘AndhraP’Gujarat’’Kerala’’Punjab’,’Tripura’,’Uttar P’]
df = pd.DataFrame([a,g,k,p,t,u],index=state,columns
=[‘Fruits’,’Pulses’,’Rice’,’Wheat’])
df.to_csv(‘C:\\Users\\devangi jain\\OneDrive\\Desktop\\state.csv’)

#to export the content of this dataframe to a csv file state. csv. Replace NaN
values with Null
df.to_csv(‘C:\\Users\\devangi
jain\\OneDrive\\Desktop\\state.csv’,na_rep=’Null’)

#to export the contents of this dataframe without row indexes where NaN
stored as Null
df.to_csv(‘C:\\Users\\devangi
jain\\OneDrive\\Desktop\\state.csv’,na_rep=’Null’,index=None)

#to export the contents of this dataframe to a csv file state.csv


#without column header. Replace NaN values with Null
df.to_csv(‘C:\\Users\\devangi
jain\\OneDrive\\Desktop\\state.csv’,na_rep=’Null’,header=False)

OUTPUT:
original dataframe is as follows:
Fruits Pulses Rice Wheat
Andhra P. 7830.0 931.0 7452.4 NaN
Gujarat 11950.0 818.0 1930.0 2737.0
Kerala 113.1 1.7 2604.8 NaN
Punjab 7152.0 33.0 11586.2 16440.5
Tripura 44.1 23.2 814.6 0.5
Uttar P. 140169.2 2184.4 13754.0 30056.0
#create a line chart depicting average height and weight of persons aged 8 to
16 (Aadya and Devangi)
import matplotlib.pyplot as plt
height=[121.9,124.5,129.5,134.6,139.7,147.3,152.4,157.5,162.6]
weight=[19.7,21.3,23.5,25.9,28.5,32.1,35.7,39.6,43.2]
plt.plot(weight,height)
plt.xlabel('weight in kg')
plt.ylabel('height in cm')
plt.plot(weight,height,color='green',marker='*',markersize=10,linestyle='dashe
d',linewidth=2)
plt.title('Average weight with respect ot the height')
plt.show()

OUTPUT:

#plotting the values of sine,cosine and tangent for the same array
a2 in a line chart (Devangi and Aadya)
import matplotlib.pyplot as plt
import numpy as np
ar2 = [1,7,21,35,35,21,7,1]
s2 = np.sin(ar2)
c2 = np.cos(ar2)
t2 = np.tan(ar2)
plt.plot(ar2,s2,color='cyan',label='sine')
plt.plot(ar2,c2,color='red',label='cosine')
plt.plot(ar2,t2,color='black',linestyle='dashed',label='tangent')
plt.grid(True)
plt.xlabel('array values')
plt.ylabel('sine,cosine,tangent')
plt.title('trigonometric functions',fontsize=10,color='blue')
plt.legend(loc=4)
plt.show()

OUTPUT:
#create a dataframe from a csv and then plot it's line chart (Aadya
and Devangi)

import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('C:\\Users\\devangi
jain\\OneDrive\\Desktop\\melasales.csv')
print('original data frame:')
print(df)
#create a line plot of different color for each week
df.plot()
#givw title as "Mela Sales Report"
plt.title('Mela Sales Report')
#label x axis as "days"
plt.xlabel('days')
#label y axis as "sales in Rs."
plt.ylabel('sales in Rs.')
plt.xticks(df.index,df.day)
plt.show()

OUTPUT:

original data frame:


Unnamed: 0 day week 1 week 2 week 3
0 0 mon 5000 3000 5800
1 1 tue 5900 3000 5800
2 2 wed 6500 5000 3500
3 3 thur 3500 5500 2500
4 4 fri 4000 3000 3000
5 5 sat 5300 4300 5300
6 6 sun 7900 5900 6000
#to plot bar chart for marks obtained by first five roll numbers
(Aadya and Devangi)

import matplotlib.pyplot as plt


x= ['anu' ,'babu', 'sweety','meena', 'navya']
y=[90,45,78,56,98]
plt.bar(x,y,color='red')
plt.xlabel('students name')
plt.ylabel('marks obtained')
plt.title('Result')
plt.show()

OUTPUT:
#to plot horizontal bar graph for users of 5 different computer
languages(Aadya and Devangi)

import matplotlib.pyplot as plt


l=['python','java','c++','java script','html']
n=[220,557,123,352,52]
plt.barh(l,n)
plt.xlabel('number of users')
plt.ylabel(languages)
plt.title('users of different computer languages')
plt.show()

OUTPUT:
#plot a bar chart from a csv file (Devangi and Aadya)
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('C:\\Users\\AADYA KISHORE\\Desktop\melasales.csv')
print('original data frame:')
print(df)
df.plot(kind='bar',x='day',color=['red','yellow','purple'],linestyle='dashed',width
=0.5,edgecolor='green')
plt.xlabel('days')
plt.ylabel('sales in Rs.')
plt.title('Mela Sales Report')
plt.show()

OUTPUT:

original data frame:


Unnamed: 0 day week 1 week 2 week 3
0 0 mon 5000 3000 5800
1 1 tue 5900 3000 5800
2 2 wed 6500 5000 3500
3 3 thur 3500 5500 2500
4 4 fri 4000 3000 3000
5 5 sat 5300 4300 5300
6 6 sun 7900 5900 6000
# Display a histogram which show the marks of the students in a class
(Devangi and Aadya)
import matplotlib.pyplot as plt
score=[0,41,52,72, 80,95] #data set that has to be plotted in the form of
frequency
b = [0,33,50,60,75,85,100] #bins
w=[2,4,10,6,17,15]
plt.hist(score,bins=b,weights=w,edgecolor='black',color='r')
plt.xlabel('range of percentage')
plt.ylabel('frequency')
plt.show()

OUTPUT:
#display a histogram corressponding to the height and weight
(Devangi and Aadya)

import pandas as pd
import matplotlib.pyplot as plt
data={'name':['arnav','sheela','azhar','bincy','yash','nazar'],'height':
[60,61,63,65,61,60],'weight':[47,89,52,58,50,47]}
df=pd.DataFrame(data)
df.plot(kind='hist')
plt.xlabel('height and weight')
plt.ylabel('frequency')
plt.title('corresponding height and weight of the students')
plt.show()

OUTPUT:
#create histogram using dataframe sorresponding to attributes having
numeric values and show customisation (Aadya and Devangi)
import pandas as pd
import matplotlib.pyplot as plt
data={'name':['aarav','sheela','azhar','bincy','yash','nazar'],'height':
[60,61,63,65,61,60],'weight':[47,89,52,58,50,47]}
df=pd.DataFrame(data)
df.plot(kind='hist',edgecolor='Green',linewidth=2,linestyle=':',fill=False,hatch=’
o')
plt.show()

OUTPUT:

You might also like