Professional Documents
Culture Documents
Q1 Statistics
Q1 Statistics
Q1 Statistics
import pandas as pd
# a)
autoMPG = pd.read_csv("autoMPG.csv")
discreteColumns = ["cylinders", "model year", "origin"]
autoMPG = pd.get_dummies(autoMPG, columns=['cylinders', 'model year', 'origin'])
autoMPG.to_csv('autoMPGmodified.csv', index=False)
print("Applied one hot encoding and saved in a new file!")
for i in range(d):
print(f"Feature {i + 1} - Mean (x̄): {meanVector[i]}, Variance (σ^2): {varianceVector[i]}")
# c) Normalizing the data and calculating mean and variance again for the normalized data
normalizedData = data - meanVector / varianceVector ** 0.5 # Calculating normalized
values
print(f'\nNormalized data:\n {normalizedData}')
n, d = normalizedData.shape
varianceVectorNorm = np.zeros(d)
for j in range(d):
squared_diff = (normalizedData[:, j] - np.mean(normalizedData[:, j])) ** 2
varianceVectorNorm[j] = np.sum(squared_diff) / (n - 1) # Calculating variance