Professional Documents
Culture Documents
P#04 ML 46
P#04 ML 46
P#04 ML 46
In [2]: #1. Write a Pandas program to create and display a one-dimensional array-like object
con
ds = pd.Series([2, 4, 6, 8,
10]) print(ds)
0 2
1 4
2 6
3 8
4 10
dtype: int64
In [12]: #2. Write a Pandas program to convert a Panda module Series to Python list and
it’sntype
ds = pd.Series([2, 4, 6, 8,
10]) print("Pandas Series and
type") print(ds)
print(type(ds))
print("Convert Pandas Series to Python
list") print(ds.tolist())
print(type(ds.tolist()))
Pandas Series and type
0 2
1 4
2 6
3 8
4 10
dtype: int64
<class 'pandas.core.series.Series'>
Convert Pandas Series to Python
list [2, 4, 6, 8, 10]
<class 'list'>
Original dictionary:
{'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
Converted series:
a 100
b 200
c 300
d 400
e 800
dtype: int64
In [15]: #4. Write a Pandas program to convert a NumPy array to a Pandas series.
NumPy array:
[10 20 30 40 50]
Converted Pandas series:
0 10
1 20
2 30
3 40
4 50
dtype: int32
In [16]: #5. Write a Pandas program to change the data type of given a column or a Series
In #6. Write a Pandas program to convert the first column of a Data frame as a Series.
[11]:
d = {'col1': [1, 2, 3, 4, 7, 11],
'col2': [4, 5, 6, 9, 5, 0],
'col3': [7, 5, 8, 12, 1,11]}
print(s1) print(type(s1))
Original DataFrame
col1 col2 col3
0 1 4 7
1 2 5 5
2 3 6 8
3 4 9 12
4 7 5 1
5 11 0 11
In [10]: #7. Write a Pandas program to join the two given data frames along rows and assign
all d
student_data1 = pd.DataFrame({
'student_id': ['S1', 'S2', 'S3', 'S4', 'S5'],
'name': ['Danniella Fenton', 'Ryder
Storey', 'Bryce Jensen', 'Ed Bernal',
'Kwame Morin'], 'marks': [200, 210, 190,
222, 199]})
student_data2 = pd.DataFrame({
'student_id': ['S4', 'S5', 'S6', 'S7', 'S8'],
'name': ['Scarlette Fisher', 'Carla Williamson',
'Dante Morse', 'Kaiser William', 'Madeeha
Preston'], 'marks': [201, 200, 198, 219, 201]})
print("Original DataFrames:")
print(student_data1)
print("-----------------------------------------------------------------")
print(student_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([student_data1, student_data2])
Original DataFrames:
student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
Out[9]:
col1
0 9
1 11
2 13
3 15
4 17
5 19
In [32]: #9. Create a custom data frame and apply logical, query and filter methods on the
same
print(df)
name ctg val val2
0 Jane A 0.61 7
1 John A 0.03 7
2 Ashley C 0.43 8
3 Mike B 0.62 8
4 Emily B 0.30 3
5 Jack C 0.63 2
6 Catlin B 0.82 8
In [31]: #11. Calculate the mean, median and mode for the following data frame.
dataMatrix = {"D1":[135, 137, 136, 138, 138],
"D2":[43, 42, 42, 42, 42],
"D3":[72, 73, 72, 72, 73],
"D4":[100, 102, 100, 103, 104] };
dataFrame = pd.DataFrame(data=dataMatrix);
print("DataFrame:");
print("Mean:Computed column-wise:");
meanData = dataFrame.mean();
print(meanData);
print("Mean:Computed row-wise:");
meanData = dataFrame.mean(axis=1);
print(meanData);
print("Median:Computed column-wise:");
medianData = dataFrame.median();
print(medianData);
print("Median:Computed row-wise:");
medianData = dataFrame.median(axis=1);
print(medianData);
print("Mode:Computed column-wise:");
modeData = dataFrame.mode();
print(modeData);
print("Mode:Computed row-wise:");
modeData = dataFrame.mode(axis=1);
print(modeData);
DataFrame:
Mean:Computed column-wise:
D1 136.8
D2 42.2
D3 72.4
D4 101.8
dtype: float64
Mean:Computed row-wise:
0 87.50
1 88.50
2 87.50
3 88.75
4 89.25
dtype: float64
Median:Computed column-wise:
D1 137.0
D2 42.0
D3 72.0
D4 102.0
dtype: float64
Median:Computed row-wise:
0 86.0
1 87.5
2 86.0
3 87.5
4 88.5
dtype: float64
Mode:Computed column-wise:
D1 D2 D3 D4
0 138 42 72 100
Mode:Computed row-wise:
0 1 2 3
0 43 72 100 135
1 42 73 102 137
2 42 72 100 136
3 42 72 103 138
4 42 73 104 138
In [ ]: #Intermediate Level:
In [3]: # 1. Read the CSV from the given URL with and without header.
url =r"C:\Users\quite\Downloads\Iris.csv"
# Read without header (assuming no header in the CSV)
df1 = pd.read_csv(url,
header=None) print(df1)
0 1 2 3 4 \
0 Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
.. ... ... ... ... ...
146 146 6.7 3.0 5.2 2.3
147 147 6.3 2.5 5.0 1.9
148 148 6.5 3.0 5.2 2.0
149 149 6.2 3.4 5.4 2.3
150 150 5.9 3.0 5.1 1.8
5
0 Species
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
150 Iris-virginica
[151 rows x 6
columns]
# 1. Read the CSV from the given URL with and without header.
url = r"C:\Users\quite\Downloads\Iris.csv"
# Read with header (assuming the first row contains column names)
df =
pd.read_csv(url)
print(df)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \
0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
4 5 5.0 3.6 1.4 0.2
.. ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3
146 147 6.3 2.5 5.0 1.9
147 148 6.5 3.0 5.2 2.0
148 149 6.2 3.4 5.4 2.3
149 150 5.9 3.0 5.1 1.8
Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
[150 rows x 6
columns]
# 2. Change the order of the columns (example, changing the order of the first two
colum df_reorder= df[['SepalWidthCm', 'SepalLengthCm', 'PetalLengthCm', 'PetalWidthCm',
In [5]: 'Speci print(df_reorder)
In [30]: # 3. Read data from "1.csv" file and measure mean, mode, and standard deviation
data_from_file = pd.read_csv(r"C:\Users\quite\Downloads\Iris.csv")
mean = data_from_file.mean()
mode = data_from_file.mode().iloc[0] # Mode can have multiple values, taking the
first
std_dev = data_from_file.std()
print("mean:", mean)
print("----------------------------------------------------------------------------------------------------------------------------------")
print("mode:", mode)
print("----------------------------------------------------------------------------------------------------------------------------------")
print("std_dev:", std_dev)
mean: Id 75.500000
SepalLengthCm 5.843333
SepalWidthCm 3.054000
PetalLengthCm 3.758667
PetalWidthCm 1.198667
dtype: float64
mode: Id 1
SepalLengthCm 5.0
SepalWidthCm 3.0
PetalLengthCm 1.5
PetalWidthCm 0.2
Species Iris-setosa
Name: 0, dtype: object
std_dev: Id 43.445368
SepalLengthCm 0.828066
SepalWidthCm 0.433594
PetalLengthCm 1.764420
PetalWidthCm 0.763161
dtype: float64
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:3: FutureWarning: Droppin
g of nuisance columns in DataFrame reductions (with 'numeric_only=None') is
deprecated; in a future version this will raise TypeError. Select only valid columns
before calling the reduction.
mean = data_from_file.mean() C:\Users\quite\AppData\Local\Temp\
ipykernel_7512\1955642370.py:5: FutureWarning: Droppin g of nuisance columns in
DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version
this will raise TypeError. Select only valid columns before calling the reduction.
std_dev = data_from_file.std()
num_columns = len(df.columns)
column_names = df.columns.tolist()
print("number of columns:",num_columns
) print("column names:",column_names)
number of columns: 6
column names: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
'Species']
print(columns_n_to_m)
In [26]: # 9. Display specific rows and columns using "iloc" and "loc" functions
specific_rows_columns = df.loc[10:15, ['SepalLengthCm', 'PetalLengthCm']]
print(specific_rows_columns)
SepalLengthCm PetalLengthCm
10 5.4 1.5
11 4.8 1.6
12 4.8 1.4
13 4.3 1.1
14 5.8 1.2
15 5.7 1.5
[5.1 4.9 4.7 4.6 5. 5.4 4.4 4.8 4.3 5.8 5.7 5.2 5.5 4.5 5.3 7. 6.4 6.9
6.5 6.3 6.6 5.9 6. 6.1 5.6 6.7 6.2 6.8 7.1 7.6 7.3 7.2 7.7 7.4 7.9]
In [28]: # 11. Calculate mean, median, and mode for a specific column (e.g., 'sepal_length')
column_name = 'SepalLengthCm'
mean= df[column_name].mean()
median= df[column_name].median()
mode= df[column_name].mode().iloc[0]
print("mean:", mean)
print("median:", median)
print("mode:", mode)
mean: 5.843333333333335
median: 5.8
mode: 5.0
In [ ]: