P#04 ML 46

In [ ]: Basics Level

In [13]: import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

In [2]: #1. Write a Pandas program to create and display a one-dimensional array-like object

ds = pd.Series([2, 4, 6, 8,
10]) print(ds)
0 2
1 4
2 6
3 8
4 10
dtype: int64

In [12]: #2. Write a Pandas program to convert a Panda module Series to Python list and

ds = pd.Series([2, 4, 6, 8,
10]) print("Pandas Series and
type") print(ds)
print("Convert Pandas Series to Python
list") print(ds.tolist())
Pandas Series and type
0 2
1 4
2 6
3 8
4 10
dtype: int64
<class 'pandas.core.series.Series'>
Convert Pandas Series to Python
list [2, 4, 6, 8, 10]
<class 'list'>

In [14]: #3. Write a Pandas program to convert a dictionary to a Pandas series.

Sample #dictionary: d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}

d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}

print("Original dictionary:")
new_series = pd.Series(d1)
print("Converted series:")

Original dictionary:
{'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
Converted series:
a 100
b 200
c 300
d 400
e 800
dtype: int64

In [15]: #4. Write a Pandas program to convert a NumPy array to a Pandas series.

np_array = np.array([10, 20, 30, 40,

50]) print("NumPy array:")
new_series = pd.Series(np_array)
print("Converted Pandas series:")

NumPy array:
[10 20 30 40 50]
Converted Pandas series:
0 10
1 20
2 30
3 40
4 50
dtype: int32

In [16]: #5. Write a Pandas program to change the data type of given a column or a Series

s1 = pd.Series(['100', '200', 'python', '300.12',

print("Original Data Series:")
print("Change the said data type to
numeric:") s2 = pd.to_numeric(s1,
errors='coerce') print(s2)

Original Data Series:

0 100
1 200
2 python
3 300.12
4 400
dtype: object
Change the said data type to numeric:
0 100.00
1 200.00
2 NaN
3 300.12
4 400.00
dtype: float64

In #6. Write a Pandas program to convert the first column of a Data frame as a Series.
d = {'col1': [1, 2, 3, 4, 7, 11],
'col2': [4, 5, 6, 9, 5, 0],
'col3': [7, 5, 8, 12, 1,11]}

df = pd.DataFrame(data=d) print("Original DataFrame") print(df)

s1 = df.ix[:,0]
print("\n1st column as a Series:")

print(s1) print(type(s1))

Original DataFrame
col1 col2 col3
0 1 4 7
1 2 5 5
2 3 6 8
3 4 9 12
4 7 5 1
5 11 0 11

AttributeError Traceback (most recent call last)

Input In [11], in <cell line: 10>()
8 print("Original DataFrame")
9 print(df)
---> 10 s1 = df.ix[:,0]
11 print("\n1st column as a Series:")
13 print(s1)

File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in NDFrame. getattr (s

elf, name)
5568 if (
5569 name not in self._internal_names_set
5570 and name not in self._metadata
5571 and name not in self._accessors
5572 and self._info_axis._can_hold_identifiers_and_holds_name(name)
5573 ):
5574 return self[name]
-> 5575 return object. getattribute (self, name)

AttributeError: 'DataFrame' object has no attribute 'ix'

In [10]: #7. Write a Pandas program to join the two given data frames along rows and assign
all d

student_data1 = pd.DataFrame({
'student_id': ['S1', 'S2', 'S3', 'S4', 'S5'],
'name': ['Danniella Fenton', 'Ryder
Storey', 'Bryce Jensen', 'Ed Bernal',
'Kwame Morin'], 'marks': [200, 210, 190,
222, 199]})

student_data2 = pd.DataFrame({
'student_id': ['S4', 'S5', 'S6', 'S7', 'S8'],
'name': ['Scarlette Fisher', 'Carla Williamson',
'Dante Morse', 'Kaiser William', 'Madeeha
Preston'], 'marks': [201, 200, 198, 219, 201]})

print("Original DataFrames:")

print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([student_data1, student_data2])
Original DataFrames:
student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199

student_id name marks

0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201

Join the said two dataframes along rows:

student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201

In [9]: #8. Filter a specific rows/columns from the data frame

x = pd.DataFrame({"col1" : np.arange(1,20,2)}, index=[9,8,7,6,0, 1, 2, 3, 4, 5])



0 9

1 11

2 13

3 15

4 17

5 19

In [32]: #9. Create a custom data frame and apply logical, query and filter methods on the

df= pd.DataFrame({ 'name':

'val2':np.random.randint(1,10, size=7)

name ctg val val2
0 Jane A 0.61 7
1 John A 0.03 7
2 Ashley C 0.43 8
3 Mike B 0.62 8
4 Emily B 0.30 3
5 Jack C 0.63 2
6 Catlin B 0.82 8

In [31]: #11. Calculate the mean, median and mode for the following data frame.
dataMatrix = {"D1":[135, 137, 136, 138, 138],
"D2":[43, 42, 42, 42, 42],
"D3":[72, 73, 72, 72, 73],
"D4":[100, 102, 100, 103, 104] };
dataFrame = pd.DataFrame(data=dataMatrix);

print("Mean:Computed column-wise:");
meanData = dataFrame.mean();

print("Mean:Computed row-wise:");
meanData = dataFrame.mean(axis=1);

print("Median:Computed column-wise:");
medianData = dataFrame.median();


print("Median:Computed row-wise:");
medianData = dataFrame.median(axis=1);

print("Mode:Computed column-wise:");
modeData = dataFrame.mode();

print("Mode:Computed row-wise:");
modeData = dataFrame.mode(axis=1);

Mean:Computed column-wise:
D1 136.8
D2 42.2
D3 72.4
D4 101.8
dtype: float64
Mean:Computed row-wise:
0 87.50
1 88.50
2 87.50
3 88.75
4 89.25
dtype: float64
Median:Computed column-wise:
D1 137.0
D2 42.0
D3 72.0
D4 102.0
dtype: float64
Median:Computed row-wise:
0 86.0
1 87.5
2 86.0
3 87.5
4 88.5
dtype: float64
Mode:Computed column-wise:
D1 D2 D3 D4
0 138 42 72 100
Mode:Computed row-wise:
0 1 2 3
0 43 72 100 135
1 42 73 102 137
2 42 72 100 136
3 42 72 103 138
4 42 73 104 138

In [ ]: #Intermediate Level:

In [3]: # 1. Read the CSV from the given URL with and without header.
url =r"C:\Users\quite\Downloads\Iris.csv"
# Read without header (assuming no header in the CSV)
df1 = pd.read_csv(url,
header=None) print(df1)

0 1 2 3 4 \
0 Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
.. ... ... ... ... ...
146 146 6.7 3.0 5.2 2.3
147 147 6.3 2.5 5.0 1.9
148 148 6.5 3.0 5.2 2.0
149 149 6.2 3.4 5.4 2.3
150 150 5.9 3.0 5.1 1.8

0 Species
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
150 Iris-virginica

[151 rows x 6


In [4]: import pandas as pd

# 1. Read the CSV from the given URL with and without header.
url = r"C:\Users\quite\Downloads\Iris.csv"
# Read with header (assuming the first row contains column names)
df =
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \
0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
4 5 5.0 3.6 1.4 0.2
.. ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3
146 147 6.3 2.5 5.0 1.9
147 148 6.5 3.0 5.2 2.0
148 149 6.2 3.4 5.4 2.3
149 150 5.9 3.0 5.1 1.8

0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica

[150 rows x 6

# 2. Change the order of the columns (example, changing the order of the first two
colum df_reorder= df[['SepalWidthCm', 'SepalLengthCm', 'PetalLengthCm', 'PetalWidthCm',
In [5]: 'Speci print(df_reorder)

SepalWidthCm SepalLengthCm PetalLengthCm PetalWidthCm Species

0 3.5 5.1 1.4 0.2 Iris-setosa
1 3.0 4.9 1.4 0.2 Iris-setosa
2 3.2 4.7 1.3 0.2 Iris-setosa
3 3.1 4.6 1.5 0.2 Iris-setosa
4 3.6 5.0 1.4 0.2 Iris-setosa
.. ... ... ... ... ...
145 3.0 6.7 5.2 2.3 Iris-virginica
146 2.5 6.3 5.0 1.9 Iris-virginica
147 3.0 6.5 5.2 2.0 Iris-virginica
148 3.4 6.2 5.4 2.3 Iris-virginica
149 3.0 5.9 5.1 1.8 Iris-virginica

[150 rows x 5 columns]

In [30]: # 3. Read data from "1.csv" file and measure mean, mode, and standard deviation
data_from_file = pd.read_csv(r"C:\Users\quite\Downloads\Iris.csv")
mean = data_from_file.mean()
mode = data_from_file.mode().iloc[0] # Mode can have multiple values, taking the
std_dev = data_from_file.std()
print("mean:", mean)
print("mode:", mode)
print("std_dev:", std_dev)
mean: Id 75.500000
SepalLengthCm 5.843333
SepalWidthCm 3.054000
PetalLengthCm 3.758667
PetalWidthCm 1.198667
dtype: float64

mode: Id 1
SepalLengthCm 5.0
SepalWidthCm 3.0
PetalLengthCm 1.5
PetalWidthCm 0.2
Species Iris-setosa
Name: 0, dtype: object

std_dev: Id 43.445368
SepalLengthCm 0.828066
SepalWidthCm 0.433594
PetalLengthCm 1.764420
PetalWidthCm 0.763161
dtype: float64
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:3: FutureWarning: Droppin
g of nuisance columns in DataFrame reductions (with 'numeric_only=None') is
deprecated; in a future version this will raise TypeError. Select only valid columns
before calling the reduction.
mean = data_from_file.mean() C:\Users\quite\AppData\Local\Temp\
ipykernel_7512\1955642370.py:5: FutureWarning: Droppin g of nuisance columns in
DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version
this will raise TypeError. Select only valid columns before calling the reduction.
std_dev = data_from_file.std()

In [7]: # 4. Read and display the first three rows


Out[7]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

In [21]: # 5. Read and display the first n samples

n = 5
first_n_samples = df.head(n)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa

In [23]: # 6. Display the number of columns and their names

num_columns = len(df.columns)
column_names = df.columns.tolist()
print("number of columns:",num_columns
) print("column names:",column_names)

number of columns: 6
column names: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',

In [24]: # 7. Display columns using column slicing (n to m)

n = 2 # Start column index
m = 4 # End column index (inclusive)
columns_n_to_m = df.iloc[:, n:m+1]


SepalWidthCm PetalLengthCm PetalWidthCm

0 3.5 1.4 0.2
1 3.0 1.4 0.2
2 3.2 1.3 0.2
3 3.1 1.5 0.2
4 3.6 1.4 0.2
.. ... ... ...
145 3.0 5.2 2.3
146 2.5 5.0 1.9
147 3.0 5.2 2.0
148 3.4 5.4 2.3
149 3.0 5.1 1.8

[150 rows x 3 columns]

In [25]: # 8. Display rows using row slicing (14th to 34th rows)

rows =
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
13 14 4.3 3.0 1.1 0.1 Iris-setosa
14 15 5.8 4.0 1.2 0.2 Iris-setosa
15 16 5.7 4.4 1.5 0.4 Iris-setosa
16 17 5.4 3.9 1.3 0.4 Iris-setosa
17 18 5.1 3.5 1.4 0.3 Iris-setosa
18 19 5.7 3.8 1.7 0.3 Iris-setosa
19 20 5.1 3.8 1.5 0.3 Iris-setosa
20 21 5.4 3.4 1.7 0.2 Iris-setosa
21 22 5.1 3.7 1.5 0.4 Iris-setosa
22 23 4.6 3.6 1.0 0.2 Iris-setosa
23 24 5.1 3.3 1.7 0.5 Iris-setosa
24 25 4.8 3.4 1.9 0.2 Iris-setosa
25 26 5.0 3.0 1.6 0.2 Iris-setosa
26 27 5.0 3.4 1.6 0.4 Iris-setosa
27 28 5.2 3.5 1.5 0.2 Iris-setosa
28 29 5.2 3.4 1.4 0.2 Iris-setosa
29 30 4.7 3.2 1.6 0.2 Iris-setosa
30 31 4.8 3.1 1.6 0.2 Iris-setosa
31 32 5.4 3.4 1.5 0.4 Iris-setosa
32 33 5.2 4.1 1.5 0.1 Iris-setosa
33 34 5.5 4.2 1.4 0.2 Iris-setosa

In [26]: # 9. Display specific rows and columns using "iloc" and "loc" functions
specific_rows_columns = df.loc[10:15, ['SepalLengthCm', 'PetalLengthCm']]

SepalLengthCm PetalLengthCm
10 5.4 1.5
11 4.8 1.6
12 4.8 1.4
13 4.3 1.1
14 5.8 1.2
15 5.7 1.5

In [27]: # 10. Count unique values in the first column

unique_values_count = df['SepalLengthCm'].unique()

[5.1 4.9 4.7 4.6 5. 5.4 4.4 4.8 4.3 5.8 5.7 5.2 5.5 4.5 5.3 7. 6.4 6.9
6.5 6.3 6.6 5.9 6. 6.1 5.6 6.7 6.2 6.8 7.1 7.6 7.3 7.2 7.7 7.4 7.9]

In [28]: # 11. Calculate mean, median, and mode for a specific column (e.g., 'sepal_length')
column_name = 'SepalLengthCm'
mean= df[column_name].mean()
median= df[column_name].median()
mode= df[column_name].mode().iloc[0]
print("mean:", mean)
print("median:", median)
print("mode:", mode)

mean: 5.843333333333335
median: 5.8
mode: 5.0

In [ ]:

