P#04 ML 46

You might also like

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 11

In [ ]: Basics Level

In [13]: import numpy as np


import pandas as pd
import matplotlib.pyplot as plt

In [2]: #1. Write a Pandas program to create and display a one-dimensional array-like object
con

ds = pd.Series([2, 4, 6, 8,
10]) print(ds)
0 2
1 4
2 6
3 8
4 10
dtype: int64

In [12]: #2. Write a Pandas program to convert a Panda module Series to Python list and
it’sntype

ds = pd.Series([2, 4, 6, 8,
10]) print("Pandas Series and
type") print(ds)
print(type(ds))
print("Convert Pandas Series to Python
list") print(ds.tolist())
print(type(ds.tolist()))
Pandas Series and type
0 2
1 4
2 6
3 8
4 10
dtype: int64
<class 'pandas.core.series.Series'>
Convert Pandas Series to Python
list [2, 4, 6, 8, 10]
<class 'list'>

In [14]: #3. Write a Pandas program to convert a dictionary to a Pandas series.


Sample #dictionary: d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}

d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}


print("Original dictionary:")
print(d1)
new_series = pd.Series(d1)
print("Converted series:")
print(new_series)

Original dictionary:
{'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
Converted series:
a 100
b 200
c 300
d 400
e 800
dtype: int64

In [15]: #4. Write a Pandas program to convert a NumPy array to a Pandas series.

np_array = np.array([10, 20, 30, 40,


50]) print("NumPy array:")
print(np_array)
new_series = pd.Series(np_array)
print("Converted Pandas series:")
print(new_series)

NumPy array:
[10 20 30 40 50]
Converted Pandas series:
0 10
1 20
2 30
3 40
4 50
dtype: int32

In [16]: #5. Write a Pandas program to change the data type of given a column or a Series

s1 = pd.Series(['100', '200', 'python', '300.12',


'400'])
print("Original Data Series:")
print(s1)
print("Change the said data type to
numeric:") s2 = pd.to_numeric(s1,
errors='coerce') print(s2)

Original Data Series:


0 100
1 200
2 python
3 300.12
4 400
dtype: object
Change the said data type to numeric:
0 100.00
1 200.00
2 NaN
3 300.12
4 400.00
dtype: float64

In #6. Write a Pandas program to convert the first column of a Data frame as a Series.
[11]:
d = {'col1': [1, 2, 3, 4, 7, 11],
'col2': [4, 5, 6, 9, 5, 0],
'col3': [7, 5, 8, 12, 1,11]}

df = pd.DataFrame(data=d) print("Original DataFrame") print(df)


s1 = df.ix[:,0]
print("\n1st column as a Series:")

print(s1) print(type(s1))

Original DataFrame
col1 col2 col3
0 1 4 7
1 2 5 5
2 3 6 8
3 4 9 12
4 7 5 1
5 11 0 11

AttributeError Traceback (most recent call last)


Input In [11], in <cell line: 10>()
8 print("Original DataFrame")
9 print(df)
---> 10 s1 = df.ix[:,0]
11 print("\n1st column as a Series:")
13 print(s1)

File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in NDFrame. getattr (s


elf, name)
5568 if (
5569 name not in self._internal_names_set
5570 and name not in self._metadata
5571 and name not in self._accessors
5572 and self._info_axis._can_hold_identifiers_and_holds_name(name)
5573 ):
5574 return self[name]
-> 5575 return object. getattribute (self, name)

AttributeError: 'DataFrame' object has no attribute 'ix'

In [10]: #7. Write a Pandas program to join the two given data frames along rows and assign
all d

student_data1 = pd.DataFrame({
'student_id': ['S1', 'S2', 'S3', 'S4', 'S5'],
'name': ['Danniella Fenton', 'Ryder
Storey', 'Bryce Jensen', 'Ed Bernal',
'Kwame Morin'], 'marks': [200, 210, 190,
222, 199]})

student_data2 = pd.DataFrame({
'student_id': ['S4', 'S5', 'S6', 'S7', 'S8'],
'name': ['Scarlette Fisher', 'Carla Williamson',
'Dante Morse', 'Kaiser William', 'Madeeha
Preston'], 'marks': [201, 200, 198, 219, 201]})

print("Original DataFrames:")
print(student_data1)
print("-----------------------------------------------------------------")

print(student_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([student_data1, student_data2])
Original DataFrames:
student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199

student_id name marks


0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201

Join the said two dataframes along rows:


student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201

In [9]: #8. Filter a specific rows/columns from the data frame

x = pd.DataFrame({"col1" : np.arange(1,20,2)}, index=[9,8,7,6,0, 1, 2, 3, 4, 5])


x.iloc[0:5]
x.loc[0:5]

Out[9]:
col1

0 9

1 11

2 13

3 15

4 17

5 19

In [32]: #9. Create a custom data frame and apply logical, query and filter methods on the
same

df= pd.DataFrame({ 'name':


['Jane','John','Ashley','Mike','Emily','Jack','Catlin'],
'ctg':['A','A','C','B','B','C','B'],
'val':np.random.random(7).round(2),
'val2':np.random.randint(1,10, size=7)
})

print(df)
name ctg val val2
0 Jane A 0.61 7
1 John A 0.03 7
2 Ashley C 0.43 8
3 Mike B 0.62 8
4 Emily B 0.30 3
5 Jack C 0.63 2
6 Catlin B 0.82 8

In [31]: #11. Calculate the mean, median and mode for the following data frame.
dataMatrix = {"D1":[135, 137, 136, 138, 138],
"D2":[43, 42, 42, 42, 42],
"D3":[72, 73, 72, 72, 73],
"D4":[100, 102, 100, 103, 104] };
dataFrame = pd.DataFrame(data=dataMatrix);
print("DataFrame:");

print("Mean:Computed column-wise:");
meanData = dataFrame.mean();
print(meanData);

print("Mean:Computed row-wise:");
meanData = dataFrame.mean(axis=1);
print(meanData);

print("Median:Computed column-wise:");
medianData = dataFrame.median();

print(medianData);

print("Median:Computed row-wise:");
medianData = dataFrame.median(axis=1);
print(medianData);

print("Mode:Computed column-wise:");
modeData = dataFrame.mode();
print(modeData);

print("Mode:Computed row-wise:");
modeData = dataFrame.mode(axis=1);
print(modeData);

DataFrame:
Mean:Computed column-wise:
D1 136.8
D2 42.2
D3 72.4
D4 101.8
dtype: float64
Mean:Computed row-wise:
0 87.50
1 88.50
2 87.50
3 88.75
4 89.25
dtype: float64
Median:Computed column-wise:
D1 137.0
D2 42.0
D3 72.0
D4 102.0
dtype: float64
Median:Computed row-wise:
0 86.0
1 87.5
2 86.0
3 87.5
4 88.5
dtype: float64
Mode:Computed column-wise:
D1 D2 D3 D4
0 138 42 72 100
Mode:Computed row-wise:
0 1 2 3
0 43 72 100 135
1 42 73 102 137
2 42 72 100 136
3 42 72 103 138
4 42 73 104 138

In [ ]: #Intermediate Level:

In [3]: # 1. Read the CSV from the given URL with and without header.
url =r"C:\Users\quite\Downloads\Iris.csv"
# Read without header (assuming no header in the CSV)
df1 = pd.read_csv(url,
header=None) print(df1)

0 1 2 3 4 \
0 Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
.. ... ... ... ... ...
146 146 6.7 3.0 5.2 2.3
147 147 6.3 2.5 5.0 1.9
148 148 6.5 3.0 5.2 2.0
149 149 6.2 3.4 5.4 2.3
150 150 5.9 3.0 5.1 1.8

5
0 Species
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
150 Iris-virginica

[151 rows x 6

columns]

In [4]: import pandas as pd

# 1. Read the CSV from the given URL with and without header.
url = r"C:\Users\quite\Downloads\Iris.csv"
# Read with header (assuming the first row contains column names)
df =
pd.read_csv(url)
print(df)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \
0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
4 5 5.0 3.6 1.4 0.2
.. ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3
146 147 6.3 2.5 5.0 1.9
147 148 6.5 3.0 5.2 2.0
148 149 6.2 3.4 5.4 2.3
149 150 5.9 3.0 5.1 1.8

Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica

[150 rows x 6

columns]
# 2. Change the order of the columns (example, changing the order of the first two
colum df_reorder= df[['SepalWidthCm', 'SepalLengthCm', 'PetalLengthCm', 'PetalWidthCm',
In [5]: 'Speci print(df_reorder)

SepalWidthCm SepalLengthCm PetalLengthCm PetalWidthCm Species


0 3.5 5.1 1.4 0.2 Iris-setosa
1 3.0 4.9 1.4 0.2 Iris-setosa
2 3.2 4.7 1.3 0.2 Iris-setosa
3 3.1 4.6 1.5 0.2 Iris-setosa
4 3.6 5.0 1.4 0.2 Iris-setosa
.. ... ... ... ... ...
145 3.0 6.7 5.2 2.3 Iris-virginica
146 2.5 6.3 5.0 1.9 Iris-virginica
147 3.0 6.5 5.2 2.0 Iris-virginica
148 3.4 6.2 5.4 2.3 Iris-virginica
149 3.0 5.9 5.1 1.8 Iris-virginica

[150 rows x 5 columns]

In [30]: # 3. Read data from "1.csv" file and measure mean, mode, and standard deviation
data_from_file = pd.read_csv(r"C:\Users\quite\Downloads\Iris.csv")
mean = data_from_file.mean()
mode = data_from_file.mode().iloc[0] # Mode can have multiple values, taking the
first
std_dev = data_from_file.std()
print("mean:", mean)
print("----------------------------------------------------------------------------------------------------------------------------------")
print("mode:", mode)
print("----------------------------------------------------------------------------------------------------------------------------------")
print("std_dev:", std_dev)
mean: Id 75.500000
SepalLengthCm 5.843333
SepalWidthCm 3.054000
PetalLengthCm 3.758667
PetalWidthCm 1.198667
dtype: float64

mode: Id 1
SepalLengthCm 5.0
SepalWidthCm 3.0
PetalLengthCm 1.5
PetalWidthCm 0.2
Species Iris-setosa
Name: 0, dtype: object

std_dev: Id 43.445368
SepalLengthCm 0.828066
SepalWidthCm 0.433594
PetalLengthCm 1.764420
PetalWidthCm 0.763161
dtype: float64
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:3: FutureWarning: Droppin
g of nuisance columns in DataFrame reductions (with 'numeric_only=None') is
deprecated; in a future version this will raise TypeError. Select only valid columns
before calling the reduction.
mean = data_from_file.mean() C:\Users\quite\AppData\Local\Temp\
ipykernel_7512\1955642370.py:5: FutureWarning: Droppin g of nuisance columns in
DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version
this will raise TypeError. Select only valid columns before calling the reduction.
std_dev = data_from_file.std()

In [7]: # 4. Read and display the first three rows


df.head(3)

Out[7]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

In [21]: # 5. Read and display the first n samples


n = 5
first_n_samples = df.head(n)
print(first_n_samples)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species


0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa

In [23]: # 6. Display the number of columns and their names

num_columns = len(df.columns)
column_names = df.columns.tolist()
print("number of columns:",num_columns
) print("column names:",column_names)

number of columns: 6
column names: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
'Species']

In [24]: # 7. Display columns using column slicing (n to m)


n = 2 # Start column index
m = 4 # End column index (inclusive)
columns_n_to_m = df.iloc[:, n:m+1]

print(columns_n_to_m)

SepalWidthCm PetalLengthCm PetalWidthCm


0 3.5 1.4 0.2
1 3.0 1.4 0.2
2 3.2 1.3 0.2
3 3.1 1.5 0.2
4 3.6 1.4 0.2
.. ... ... ...
145 3.0 5.2 2.3
146 2.5 5.0 1.9
147 3.0 5.2 2.0
148 3.4 5.4 2.3
149 3.0 5.1 1.8

[150 rows x 3 columns]

In [25]: # 8. Display rows using row slicing (14th to 34th rows)


rows =
df.iloc[13:34]
print(rows)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
13 14 4.3 3.0 1.1 0.1 Iris-setosa
14 15 5.8 4.0 1.2 0.2 Iris-setosa
15 16 5.7 4.4 1.5 0.4 Iris-setosa
16 17 5.4 3.9 1.3 0.4 Iris-setosa
17 18 5.1 3.5 1.4 0.3 Iris-setosa
18 19 5.7 3.8 1.7 0.3 Iris-setosa
19 20 5.1 3.8 1.5 0.3 Iris-setosa
20 21 5.4 3.4 1.7 0.2 Iris-setosa
21 22 5.1 3.7 1.5 0.4 Iris-setosa
22 23 4.6 3.6 1.0 0.2 Iris-setosa
23 24 5.1 3.3 1.7 0.5 Iris-setosa
24 25 4.8 3.4 1.9 0.2 Iris-setosa
25 26 5.0 3.0 1.6 0.2 Iris-setosa
26 27 5.0 3.4 1.6 0.4 Iris-setosa
27 28 5.2 3.5 1.5 0.2 Iris-setosa
28 29 5.2 3.4 1.4 0.2 Iris-setosa
29 30 4.7 3.2 1.6 0.2 Iris-setosa
30 31 4.8 3.1 1.6 0.2 Iris-setosa
31 32 5.4 3.4 1.5 0.4 Iris-setosa
32 33 5.2 4.1 1.5 0.1 Iris-setosa
33 34 5.5 4.2 1.4 0.2 Iris-setosa

In [26]: # 9. Display specific rows and columns using "iloc" and "loc" functions
specific_rows_columns = df.loc[10:15, ['SepalLengthCm', 'PetalLengthCm']]
print(specific_rows_columns)

SepalLengthCm PetalLengthCm
10 5.4 1.5
11 4.8 1.6
12 4.8 1.4
13 4.3 1.1
14 5.8 1.2
15 5.7 1.5

In [27]: # 10. Count unique values in the first column


unique_values_count = df['SepalLengthCm'].unique()
print(unique_values_count)

[5.1 4.9 4.7 4.6 5. 5.4 4.4 4.8 4.3 5.8 5.7 5.2 5.5 4.5 5.3 7. 6.4 6.9
6.5 6.3 6.6 5.9 6. 6.1 5.6 6.7 6.2 6.8 7.1 7.6 7.3 7.2 7.7 7.4 7.9]

In [28]: # 11. Calculate mean, median, and mode for a specific column (e.g., 'sepal_length')
column_name = 'SepalLengthCm'
mean= df[column_name].mean()
median= df[column_name].median()
mode= df[column_name].mode().iloc[0]
print("mean:", mean)
print("median:", median)
print("mode:", mode)

mean: 5.843333333333335
median: 5.8
mode: 5.0

In [ ]:

You might also like