Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 5

National University of technology

Umar Aurangzeb
34

AI lab OEL 1

Combined code of question 1 and 2

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load your dataset


# Assuming your dataset is in a file named 'daily_air.csv' in the Downloads
folder
df = pd.read_csv('C:\\daily_air - daily_air.csv')

# Define independent features (X) and dependent feature (y)


independent_features = ['CO', 'O3', 'NO2', 'SO2', 'PM10', 'PM2.5']
dependent_feature = 'AQI'

X = df[independent_features]
y = df[dependent_feature]

# Standardize the independent features


scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Standardize the dependent variable (target)


scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled,
test_size=0.1, random_state=30)

# Create a Ridge regression model


ridge_model = Ridge(alpha=1.0) # You can adjust the alpha parameter for
regularization strength

# Train the Ridge model


ridge_model.fit(X_train, y_train)

# Make predictions on the test set


y_pred = ridge_model.predict(X_test)

# Inverse transform the scaled predictions to get them back to the original scale
y_pred_original_scale = scaler_y.inverse_transform(y_pred.reshape(-1,
1)).flatten()

# Calculate R2
r2 = r2_score(y_test, y_pred)

# Calculate Mean Squared Error


mse = mean_squared_error(y_test, y_pred)
# Plotting the actual vs predicted values
plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel('Actual AQI')
plt.ylabel('Predicted AQI')
plt.title(f'Actual vs Predicted AQI\nR-squared: {r2:.4f}, MSE: {mse:.4f}')
plt.show()

print(f'R-squared (R2): {r2:.4f}')


print(f'Mean Squared Error (MSE): {mse:.4f}')

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score,
classification_report

# Load the dataset


df = pd.read_csv('C:\\penguins - penguins.csv')

# Drop rows with missing values for simplicity (you may want to handle missing
values differently)
df = df.dropna()

# Encode categorical variables


le = LabelEncoder()
df['island'] = le.fit_transform(df['island'])
df['sex'] = le.fit_transform(df['sex'])
df['species'] = le.fit_transform(df['species'])

# Split the data into features and target variable


X = df.drop('species', axis=1)
y = df['species']

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Build a RandomForestClassifier (you can choose a different classifier based on


your preference)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

# Display the detailed classification report


print('\nClassification Report:')
print(classification_report(y_test, y_pred, target_names=le.classes_))

Output

You might also like