P#04 ML 46

In [ ]: Basics Level
In [13]: import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
In [2]: #1. Write a Pandas program to create and display a one-dimensional array-like object
con
ds = pd.Series([2, 4, 6, 8,
10]) print(ds)
0 2
1 4
2 6
3 8
4 10
dtype: int64
In [12]: #2. Write a Pandas program to convert a Panda module Series to Python list and
it’sntype
ds = pd.Series([2, 4, 6, 8,
10]) print("Pandas Series and
type") print(ds)
print(type(ds))
print("Convert Pandas Series to Python
list") print(ds.tolist())
print(type(ds.tolist()))
Pandas Series and type
0 2
1 4
2 6
3 8
4 10
dtype: int64
<class 'pandas.core.series.Series'>
Convert Pandas Series to Python
list [2, 4, 6, 8, 10]
<class 'list'>
In [14]: #3. Write a Pandas program to convert a dictionary to a Pandas series.

Sample #dictionary: d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}
d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}

print("Original dictionary:")
print(d1)
new_series = pd.Series(d1)
print("Converted series:")
print(new_series)
Original dictionary:
{'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
Converted series:
a 100
b 200
c 300
d 400
e 800
dtype: int64
In [15]: #4. Write a Pandas program to convert a NumPy array to a Pandas series.
np_array = np.array([10, 20, 30, 40,

50]) print("NumPy array:")
print(np_array)
new_series = pd.Series(np_array)
print("Converted Pandas series:")
print(new_series)
NumPy array:
[10 20 30 40 50]
Converted Pandas series:
0 10
1 20
2 30
3 40
4 50
dtype: int32
In [16]: #5. Write a Pandas program to change the data type of given a column or a Series
s1 = pd.Series(['100', '200', 'python', '300.12',

'400'])
print("Original Data Series:")
print(s1)
print("Change the said data type to
numeric:") s2 = pd.to_numeric(s1,
errors='coerce') print(s2)
Original Data Series:

0 100
1 200
2 python
3 300.12
4 400
dtype: object
Change the said data type to numeric:
0 100.00
1 200.00
2 NaN
3 300.12
4 400.00
dtype: float64
In #6. Write a Pandas program to convert the first column of a Data frame as a Series.
[11]:
d = {'col1': [1, 2, 3, 4, 7, 11],
'col2': [4, 5, 6, 9, 5, 0],
'col3': [7, 5, 8, 12, 1,11]}
df = pd.DataFrame(data=d) print("Original DataFrame") print(df)

s1 = df.ix[:,0]
print("\n1st column as a Series:")
print(s1) print(type(s1))
Original DataFrame
col1 col2 col3
0 1 4 7
1 2 5 5
2 3 6 8
3 4 9 12
4 7 5 1
5 11 0 11
AttributeError Traceback (most recent call last)

Input In [11], in <cell line: 10>()
8 print("Original DataFrame")
9 print(df)
---> 10 s1 = df.ix[:,0]
11 print("\n1st column as a Series:")
13 print(s1)
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in NDFrame. getattr (s

elf, name)
5568 if (
5569 name not in self._internal_names_set
5570 and name not in self._metadata
5571 and name not in self._accessors
5572 and self._info_axis._can_hold_identifiers_and_holds_name(name)
5573 ):
5574 return self[name]
-> 5575 return object. getattribute (self, name)
AttributeError: 'DataFrame' object has no attribute 'ix'
In [10]: #7. Write a Pandas program to join the two given data frames along rows and assign
all d
student_data1 = pd.DataFrame({
'student_id': ['S1', 'S2', 'S3', 'S4', 'S5'],
'name': ['Danniella Fenton', 'Ryder
Storey', 'Bryce Jensen', 'Ed Bernal',
'Kwame Morin'], 'marks': [200, 210, 190,
222, 199]})
student_data2 = pd.DataFrame({
'student_id': ['S4', 'S5', 'S6', 'S7', 'S8'],
'name': ['Scarlette Fisher', 'Carla Williamson',
'Dante Morse', 'Kaiser William', 'Madeeha
Preston'], 'marks': [201, 200, 198, 219, 201]})
print("Original DataFrames:")
print(student_data1)
print("-----------------------------------------------------------------")
print(student_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([student_data1, student_data2])
Original DataFrames:
student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199

0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201
Join the said two dataframes along rows:

0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201
In [9]: #8. Filter a specific rows/columns from the data frame
x = pd.DataFrame({"col1" : np.arange(1,20,2)}, index=[9,8,7,6,0, 1, 2, 3, 4, 5])

x.iloc[0:5]
x.loc[0:5]
Out[9]:
col1
0 9
1 11
2 13
3 15
4 17
5 19
In [32]: #9. Create a custom data frame and apply logical, query and filter methods on the
same
df= pd.DataFrame({ 'name':

['Jane','John','Ashley','Mike','Emily','Jack','Catlin'],
'ctg':['A','A','C','B','B','C','B'],
'val':np.random.random(7).round(2),
'val2':np.random.randint(1,10, size=7)
})
print(df)
name ctg val val2
0 Jane A 0.61 7
1 John A 0.03 7
2 Ashley C 0.43 8
3 Mike B 0.62 8
4 Emily B 0.30 3
5 Jack C 0.63 2
6 Catlin B 0.82 8
In [31]: #11. Calculate the mean, median and mode for the following data frame.
dataMatrix = {"D1":[135, 137, 136, 138, 138],
"D2":[43, 42, 42, 42, 42],
"D3":[72, 73, 72, 72, 73],
"D4":[100, 102, 100, 103, 104] };
dataFrame = pd.DataFrame(data=dataMatrix);
print("DataFrame:");
print("Mean:Computed column-wise:");
meanData = dataFrame.mean();
print(meanData);
print("Mean:Computed row-wise:");
meanData = dataFrame.mean(axis=1);
print(meanData);
print("Median:Computed column-wise:");
medianData = dataFrame.median();
print(medianData);
print("Median:Computed row-wise:");
medianData = dataFrame.median(axis=1);
print(medianData);
print("Mode:Computed column-wise:");
modeData = dataFrame.mode();
print(modeData);
print("Mode:Computed row-wise:");
modeData = dataFrame.mode(axis=1);
print(modeData);
DataFrame:
Mean:Computed column-wise:
D1 136.8
D2 42.2
D3 72.4
D4 101.8
dtype: float64
Mean:Computed row-wise:
0 87.50
1 88.50
2 87.50
3 88.75
4 89.25
dtype: float64
Median:Computed column-wise:
D1 137.0
D2 42.0
D3 72.0
D4 102.0
dtype: float64
Median:Computed row-wise:
0 86.0
1 87.5
2 86.0
3 87.5
4 88.5
dtype: float64
Mode:Computed column-wise:
D1 D2 D3 D4
0 138 42 72 100
Mode:Computed row-wise:
0 1 2 3
0 43 72 100 135
1 42 73 102 137
2 42 72 100 136
3 42 72 103 138
4 42 73 104 138
In [ ]: #Intermediate Level:
In [3]: # 1. Read the CSV from the given URL with and without header.
url =r"C:\Users\quite\Downloads\Iris.csv"
# Read without header (assuming no header in the CSV)
df1 = pd.read_csv(url,
header=None) print(df1)
0 1 2 3 4 \
0 Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
.. ... ... ... ... ...
146 146 6.7 3.0 5.2 2.3
147 147 6.3 2.5 5.0 1.9
148 148 6.5 3.0 5.2 2.0
149 149 6.2 3.4 5.4 2.3
150 150 5.9 3.0 5.1 1.8
5
0 Species
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
150 Iris-virginica
[151 rows x 6
columns]
In [4]: import pandas as pd
# 1. Read the CSV from the given URL with and without header.
url = r"C:\Users\quite\Downloads\Iris.csv"
# Read with header (assuming the first row contains column names)
df =
pd.read_csv(url)
print(df)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \
0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
4 5 5.0 3.6 1.4 0.2
.. ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3
146 147 6.3 2.5 5.0 1.9
147 148 6.5 3.0 5.2 2.0
148 149 6.2 3.4 5.4 2.3
149 150 5.9 3.0 5.1 1.8
Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
[150 rows x 6
columns]
# 2. Change the order of the columns (example, changing the order of the first two
colum df_reorder= df[['SepalWidthCm', 'SepalLengthCm', 'PetalLengthCm', 'PetalWidthCm',
In [5]: 'Speci print(df_reorder)
SepalWidthCm SepalLengthCm PetalLengthCm PetalWidthCm Species

0 3.5 5.1 1.4 0.2 Iris-setosa
1 3.0 4.9 1.4 0.2 Iris-setosa
2 3.2 4.7 1.3 0.2 Iris-setosa
3 3.1 4.6 1.5 0.2 Iris-setosa
4 3.6 5.0 1.4 0.2 Iris-setosa
.. ... ... ... ... ...
145 3.0 6.7 5.2 2.3 Iris-virginica
146 2.5 6.3 5.0 1.9 Iris-virginica
147 3.0 6.5 5.2 2.0 Iris-virginica
148 3.4 6.2 5.4 2.3 Iris-virginica
149 3.0 5.9 5.1 1.8 Iris-virginica
[150 rows x 5 columns]
In [30]: # 3. Read data from "1.csv" file and measure mean, mode, and standard deviation
data_from_file = pd.read_csv(r"C:\Users\quite\Downloads\Iris.csv")
mean = data_from_file.mean()
mode = data_from_file.mode().iloc[0] # Mode can have multiple values, taking the
first
std_dev = data_from_file.std()
print("mean:", mean)
print("----------------------------------------------------------------------------------------------------------------------------------")
print("mode:", mode)
print("----------------------------------------------------------------------------------------------------------------------------------")
print("std_dev:", std_dev)
mean: Id 75.500000
SepalLengthCm 5.843333
SepalWidthCm 3.054000
PetalLengthCm 3.758667
PetalWidthCm 1.198667
dtype: float64
mode: Id 1
SepalLengthCm 5.0
SepalWidthCm 3.0
PetalLengthCm 1.5
PetalWidthCm 0.2
Species Iris-setosa
Name: 0, dtype: object
std_dev: Id 43.445368
SepalLengthCm 0.828066
SepalWidthCm 0.433594
PetalLengthCm 1.764420
PetalWidthCm 0.763161
dtype: float64
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:3: FutureWarning: Droppin
g of nuisance columns in DataFrame reductions (with 'numeric_only=None') is
deprecated; in a future version this will raise TypeError. Select only valid columns
before calling the reduction.
mean = data_from_file.mean() C:\Users\quite\AppData\Local\Temp\
ipykernel_7512\1955642370.py:5: FutureWarning: Droppin g of nuisance columns in
DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version
this will raise TypeError. Select only valid columns before calling the reduction.
std_dev = data_from_file.std()
In [7]: # 4. Read and display the first three rows

df.head(3)
Out[7]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
In [21]: # 5. Read and display the first n samples

n = 5
first_n_samples = df.head(n)
print(first_n_samples)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa
In [23]: # 6. Display the number of columns and their names
num_columns = len(df.columns)
column_names = df.columns.tolist()
print("number of columns:",num_columns
) print("column names:",column_names)
number of columns: 6
column names: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
'Species']
In [24]: # 7. Display columns using column slicing (n to m)

n = 2 # Start column index
m = 4 # End column index (inclusive)
columns_n_to_m = df.iloc[:, n:m+1]
print(columns_n_to_m)
SepalWidthCm PetalLengthCm PetalWidthCm

0 3.5 1.4 0.2
1 3.0 1.4 0.2
2 3.2 1.3 0.2
3 3.1 1.5 0.2
4 3.6 1.4 0.2
.. ... ... ...
145 3.0 5.2 2.3
146 2.5 5.0 1.9
147 3.0 5.2 2.0
148 3.4 5.4 2.3
149 3.0 5.1 1.8
[150 rows x 3 columns]
In [25]: # 8. Display rows using row slicing (14th to 34th rows)

rows =
df.iloc[13:34]
print(rows)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
13 14 4.3 3.0 1.1 0.1 Iris-setosa
14 15 5.8 4.0 1.2 0.2 Iris-setosa
15 16 5.7 4.4 1.5 0.4 Iris-setosa
16 17 5.4 3.9 1.3 0.4 Iris-setosa
17 18 5.1 3.5 1.4 0.3 Iris-setosa
18 19 5.7 3.8 1.7 0.3 Iris-setosa
19 20 5.1 3.8 1.5 0.3 Iris-setosa
20 21 5.4 3.4 1.7 0.2 Iris-setosa
21 22 5.1 3.7 1.5 0.4 Iris-setosa
22 23 4.6 3.6 1.0 0.2 Iris-setosa
23 24 5.1 3.3 1.7 0.5 Iris-setosa
24 25 4.8 3.4 1.9 0.2 Iris-setosa
25 26 5.0 3.0 1.6 0.2 Iris-setosa
26 27 5.0 3.4 1.6 0.4 Iris-setosa
27 28 5.2 3.5 1.5 0.2 Iris-setosa
28 29 5.2 3.4 1.4 0.2 Iris-setosa
29 30 4.7 3.2 1.6 0.2 Iris-setosa
30 31 4.8 3.1 1.6 0.2 Iris-setosa
31 32 5.4 3.4 1.5 0.4 Iris-setosa
32 33 5.2 4.1 1.5 0.1 Iris-setosa
33 34 5.5 4.2 1.4 0.2 Iris-setosa
In [26]: # 9. Display specific rows and columns using "iloc" and "loc" functions
specific_rows_columns = df.loc[10:15, ['SepalLengthCm', 'PetalLengthCm']]
print(specific_rows_columns)
SepalLengthCm PetalLengthCm
10 5.4 1.5
11 4.8 1.6
12 4.8 1.4
13 4.3 1.1
14 5.8 1.2
15 5.7 1.5
In [27]: # 10. Count unique values in the first column

unique_values_count = df['SepalLengthCm'].unique()
print(unique_values_count)
[5.1 4.9 4.7 4.6 5. 5.4 4.4 4.8 4.3 5.8 5.7 5.2 5.5 4.5 5.3 7. 6.4 6.9
6.5 6.3 6.6 5.9 6. 6.1 5.6 6.7 6.2 6.8 7.1 7.6 7.3 7.2 7.7 7.4 7.9]
In [28]: # 11. Calculate mean, median, and mode for a specific column (e.g., 'sepal_length')
column_name = 'SepalLengthCm'
mean= df[column_name].mean()
median= df[column_name].median()
mode= df[column_name].mode().iloc[0]
print("mean:", mean)
print("median:", median)
print("mode:", mode)
mean: 5.843333333333335
median: 5.8
mode: 5.0
In [ ]:

P#04 ML 46

Uploaded by

Copyright:

Available Formats

You might also like

P#04 ML 46

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

P#04 ML 46

Uploaded by

Copyright:

Available Formats

In [ ]: Basics Level

In [13]: import numpy as np

In [14]: #3. Write a Pandas program to convert a dictionary to a Pandas series.

d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}

np_array = np.array([10, 20, 30, 40,

s1 = pd.Series(['100', '200', 'python', '300.12',

Original Data Series:

df = pd.DataFrame(data=d) print("Original DataFrame") print(df)

AttributeError Traceback (most recent call last)

File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in NDFrame. getattr (s

AttributeError: 'DataFrame' object has no attribute 'ix'

student_id name marks

Join the said two dataframes along rows:

In [9]: #8. Filter a specific rows/columns from the data frame

x = pd.DataFrame({"col1" : np.arange(1,20,2)}, index=[9,8,7,6,0, 1, 2, 3, 4, 5])

df= pd.DataFrame({ 'name':

In [4]: import pandas as pd

SepalWidthCm SepalLengthCm PetalLengthCm PetalWidthCm Species

[150 rows x 5 columns]

In [7]: # 4. Read and display the first three rows

Out[7]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

In [21]: # 5. Read and display the first n samples

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

In [23]: # 6. Display the number of columns and their names

In [24]: # 7. Display columns using column slicing (n to m)

SepalWidthCm PetalLengthCm PetalWidthCm

[150 rows x 3 columns]

In [25]: # 8. Display rows using row slicing (14th to 34th rows)

In [27]: # 10. Count unique values in the first column

You might also like