Correction

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

import tensorflow as tf

from tensorflow import keras


import pandas as pd
TD2
Loading Data Solution 01 drive
from google.colab import drive
drive.mount('/content/drive')
# Remplacez le chemin par le chemin complet vers votre fichier CSV
chemin_fichier_csv = '/content/drive/MyDrive/TP2/Modelisation_churn.csv'

# Charger le fichier CSV dans un DataFrame


data = pd.read_csv(chemin_fichier_csv)
------------------------
import pandas as pd

# Remplacez le chemin par le chemin complet vers votre fichier CSV sur votre disque local
chemin_fichier_csv = '/content/drive/MyDrive/TP2/Modelisation_churn.csv'

# Charger le fichier CSV dans un DataFrame


data = pd.read_csv(chemin_fichier_csv)
-----------------------------------------
# Load data from Github
data_url = 'https://raw.githubusercontent.com/Apress/artificial-neural-networks-with-
tensorflow-2/main/ch02/Churn_Modelling.csv'
data=pd.read_csv(data_url)
Preprocessing Data
# Shuffle data for taking care of patterns in data collection
from sklearn.utils import shuffle
data=shuffle(data) #shuffling the data
-------------------
# Examine loaded data
data

-----------------------------------------
# Check for null values
data.isnull().sum()
------------------------------------
# Drop irrelevant columns to set up features vector
X = data.drop(labels=['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis = 1)
# Set up labels vector
y = data['Exited']
--------------------------------
# Check data types for finding categorical columns
X.dtypes
-------------------------------------------------
# Examine few records for finding values in categorical columns
X.head()
-------------------------------------------

# Encode categorical columns


from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
X['Geography'] = label.fit_transform(X['Geography'])
X['Gender'] = label.fit_transform(X['Gender'])
# Drop the Geography column to reduce the number of features
X = pd.get_dummies(X, drop_first=True, columns=['Geography'])
X.head()
# Scale all data points to -1 to + 1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Split dataset into training and validation
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
# Determine number of features
X_train.shape[1]
→10

Defining ANN
# Create a stacked layers sequential network
model = keras.models.Sequential() # Create linear stack of layers
model.add(keras.layers.Dense(128, activation = 'relu', input_dim = X_train.shape[1]))
model.add(keras.layers.Dense(64, activation = 'relu'))
model.add(keras.layers.Dense(32, activation = 'relu'))
model.add(keras.layers.Dense(1, activation = 'sigmoid')) # activation sigmoid for a
single output
# Print model summary
model.summary()

Compiling Model
# Compile model with desired loss function, optimizer and evaluation metrics
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#to clear any other logs if present so that graphs won't overlap with previous saved logs
in tensorboard
!rm -rf ./log/
-_____-------_____________
#tensorboard visualization
import datetime, os
logdir = os.path.join("log", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq = 1)

Perform Training :
# Perform training
r = model.fit(X_train, y_train, batch_size = 32, epochs = 50, validation_data = (X_test,
y_test), callbacks = [tensorboard_callback])

TensorBoard Visualization
# Load tensorboard in Colab
%load_ext tensorboard
%tensorboard --logdir log #command to launch tensorboard on colab

Evaluting Model Performance :


# evaluate model performance on test data
test_scores = model.evaluate(X_test, y_test)
print('Test Loss: ', test_scores[0])
print('Test accuracy: ', test_scores[1] * 100)
---------_______________-------------
Plotting Metrics in matplotlib
# Plot metrics in matplotlib
%matplotlib inline
import matplotlib.pyplot as plt #for plotting curves

plt.plot(r.history['val_accuracy'], label='val_acc')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()
plt.show()

Predicting on test data


# Predict on test data
y_pred_prob = model.predict(X_test)
print(y_pred_prob)
y_pred = (y_pred_prob > 0.5).astype(int)
print("_________________________________")
print(y_pred)
X_test[0]

Confusion Matrix
from sklearn.metrics import confusion_matrix # Create confusion matrix
cf = confusion_matrix(y_test, y_pred)
cf
# Plot confusion matrix
from mlxtend.plotting import plot_confusion_matrix
plot_confusion_matrix(conf_mat = cf, cmap = plt.cm.hsv)

Accuracy Score
# Compute accuracy score 0.834
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
1/1 [==============================] - 0s 28ms/step
Predicting on unseen data # Predict on unseen customer data dtype=float32)
array([[0.]],
customer = model.predict([[615, 1, 22, 5, 20000, 5, 1, 1, 60000, 0]])
customer
new_data = [[615, 1, 22, 5, 20000, 5, 1, 1, 60000, 0]] 1/1 [==============================] - 0s
new_data_scaled = scaler.transform(new_data) 28ms/step
/usr/local/lib/python3.10/dist-
# Prédiction sur les nouvelles données normalisées packages/sklearn/base.py:439: UserWarning: X
does not have valid feature names, but
customer = model.predict(new_data_scaled)
StandardScaler was fitted with feature names
customer warnings.warn(
if customer[0] == 1: array([[1.]], dtype=float32)
print ("Customer is likely to leave")
else:
print ("Customer will stay")

Customer will stay

You might also like