Professional Documents
Culture Documents
Import As Import As From Import Import As Matplotlib Import Import
Import As Import As From Import Import As Matplotlib Import Import
import warnings
warnings.filterwarnings('ignore')
#df = pd.read_csv('/Users/sangeetha/consolidated_output.csv')
df = pd.read_csv('/Users/sangeetha/outputweo1.csv')
to_drop = ['match_id','match_num',
'duration',
'totalsets',
'P1_setswon',
'P2_setswon','P1_gameswon','P2_gameswon',
'P1_pointswon','P2_pointswon','winner','player1_rating','player2_
'prob',
'weo_prob',
'win_play_2',
'updated_win_match_1','updated_win_match_2',
'updated_total_match_1', 'updated_total_match_2',
'player1_fg','player2_fg',
'current_rating_player1','current_rating_player2',
'updated_rating_player1','updated_rating_player2',
#'current_weo_rating_player1','current_weo_rating_player2']
'updated_elo_rating_player1','updated_elo_rating_player2']
df['win_ratio_player1'] = df['win_match_1'] / df['total_match_1']
df['win_ratio_player2'] = df['win_match_2'] / df['total_match_2']
df.drop(columns=to_drop, inplace=True)
to_drop = [
'total_match_1', 'total_match_2',
'win_match_1', 'win_match_2']
df.drop(columns=to_drop, inplace=True)
print(df.columns)
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 1/5
23/07/2023, 23:52 2b
Index(['year', 'player1', 'player2', 'win_play_1',
'current_weo_rating_player1', 'current_weo_rating_player2',
'win_ratio_player1', 'win_ratio_player2'],
dtype='object')
In [64]: # Handle missing values, excluding the 'match_id' column from imputation
imputer = SimpleImputer(strategy='mean')
numeric_cols = df.select_dtypes(include=[float, int]).columns
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
# Splitting the data into training and test sets based on the year (2019)
train_data = df[df['year'] < 2019]
test_data = df[df['year'] == 2019]
# Drop unnecessary columns (match_id, year) and target column (winner) from
X_train = train_data.drop(columns=['year', 'win_play_1'])
y_train = train_data['win_play_1']
# Drop unnecessary columns (match_id, year) and target column (winner) from
X_test = test_data.drop(columns=['year', 'win_play_1'])
y_test = test_data['win_play_1']
# Combine player1 and player2 names from both training and test sets for lab
combined_players = pd.concat([X_train['player1'], X_train['player2'], X_test
label_encoder = LabelEncoder()
label_encoder.fit(combined_players)
##############################################
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
##########################################
rf_classifier.fit(X_train, y_train)
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 2/5
23/07/2023, 23:52 2b
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_recall = recall_score(y_test, svm_predictions)
top_feature_indices = np.argsort(feature_importances)[::-1][:10]
top_features = [(f"Feature {index}", importance) for index, importance in zi
Feature Importance
3 Feature_3 0.223542
2 Feature_2 0.213274
0 Feature_0 0.173855
1 Feature_1 0.162643
5 Feature_5 0.113348
4 Feature_4 0.113338
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 3/5
23/07/2023, 23:52 2b
# Create a heatmap using seaborn
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, square=True,
xticklabels=["Negative", "Positive"],
yticklabels=["Negative", "Positive"])
plt.show()
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, svm_predictions)
plt.show()
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 4/5
23/07/2023, 23:52 2b
In [ ]:
In [ ]:
localhost:8888/nbconvert/html/Downloads/2b.ipynb?download=false 5/5