Professional Documents
Culture Documents
Correction
Correction
Correction
# Remplacez le chemin par le chemin complet vers votre fichier CSV sur votre disque local
chemin_fichier_csv = '/content/drive/MyDrive/TP2/Modelisation_churn.csv'
-----------------------------------------
# Check for null values
data.isnull().sum()
------------------------------------
# Drop irrelevant columns to set up features vector
X = data.drop(labels=['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis = 1)
# Set up labels vector
y = data['Exited']
--------------------------------
# Check data types for finding categorical columns
X.dtypes
-------------------------------------------------
# Examine few records for finding values in categorical columns
X.head()
-------------------------------------------
Defining ANN
# Create a stacked layers sequential network
model = keras.models.Sequential() # Create linear stack of layers
model.add(keras.layers.Dense(128, activation = 'relu', input_dim = X_train.shape[1]))
model.add(keras.layers.Dense(64, activation = 'relu'))
model.add(keras.layers.Dense(32, activation = 'relu'))
model.add(keras.layers.Dense(1, activation = 'sigmoid')) # activation sigmoid for a
single output
# Print model summary
model.summary()
Compiling Model
# Compile model with desired loss function, optimizer and evaluation metrics
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#to clear any other logs if present so that graphs won't overlap with previous saved logs
in tensorboard
!rm -rf ./log/
-_____-------_____________
#tensorboard visualization
import datetime, os
logdir = os.path.join("log", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq = 1)
Perform Training :
# Perform training
r = model.fit(X_train, y_train, batch_size = 32, epochs = 50, validation_data = (X_test,
y_test), callbacks = [tensorboard_callback])
TensorBoard Visualization
# Load tensorboard in Colab
%load_ext tensorboard
%tensorboard --logdir log #command to launch tensorboard on colab
plt.plot(r.history['val_accuracy'], label='val_acc')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()
plt.show()
Confusion Matrix
from sklearn.metrics import confusion_matrix # Create confusion matrix
cf = confusion_matrix(y_test, y_pred)
cf
# Plot confusion matrix
from mlxtend.plotting import plot_confusion_matrix
plot_confusion_matrix(conf_mat = cf, cmap = plt.cm.hsv)
Accuracy Score
# Compute accuracy score 0.834
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
1/1 [==============================] - 0s 28ms/step
Predicting on unseen data # Predict on unseen customer data dtype=float32)
array([[0.]],
customer = model.predict([[615, 1, 22, 5, 20000, 5, 1, 1, 60000, 0]])
customer
new_data = [[615, 1, 22, 5, 20000, 5, 1, 1, 60000, 0]] 1/1 [==============================] - 0s
new_data_scaled = scaler.transform(new_data) 28ms/step
/usr/local/lib/python3.10/dist-
# Prédiction sur les nouvelles données normalisées packages/sklearn/base.py:439: UserWarning: X
does not have valid feature names, but
customer = model.predict(new_data_scaled)
StandardScaler was fitted with feature names
customer warnings.warn(
if customer[0] == 1: array([[1.]], dtype=float32)
print ("Customer is likely to leave")
else:
print ("Customer will stay")