3 Confussion Matrix Hasil Modelling OK

3-Confussion-Matrix-hasil-modelling-OK
November 15, 2022
1 TOPIC MODELLING BIGRAM-TRIGRAM

[128]: #Sumber acuan : https://medium.com/@listari.tari/
,→topic-modeling-menggunakan-latent-dirchlect-allocation-part-2-topic-modeling-with-gensim-c9f
[24]: import pandas as pd

from pprint import pprint
import seaborn as sns
[195]: #04-topik-per-judul-confus-mtr.csv
#03-topik-per-judul-confus-mtr.csv
#03-4cluster.csv
#05-topik-per-judul_b4.csv
#03-topik-per-judul_b-confus-mtr.csv
#03-topik-per-judul.csv
bln = '10'
#inputfile='dataset/'+bln+'-topik-per-judul-str.csv'
inputfile='dataset/'+bln+'-topik-per-judul.csv'
data = pd.read_csv(inputfile, sep=',', encoding='latin-1')
[196]: data.head()
[196]: Unnamed: 0 Document_No Dominant_Topic Topic_Perc_Contrib \

0 0 0 0 0.6966
1 1 1 1 0.7970
2 2 2 1 0.5937
3 3 3 0 0.7539
4 4 4 0 0.7891
Keywords \
0 uu_cipta, warga, omnibus_law, protokol_kesehat…
1 gempa_m, demo_omnibus, hari_ini, libur_panjang…
2 gempa_m, demo_omnibus, hari_ini, libur_panjang…
Text \
0 ['jokowi', 'tinjau', 'progres', 'wisata', 'pre…
1
1 ['update', 'covid-19', 'jatim:', '314', 'kasus…
2 ['azerbaijan', 'vs', 'armenia', 'perang,', 'ke…
3 ['perkumpulan', 'warga', 'minang', 'surabaya',…
4 ['azerbaijan', 'vs', 'armenia', 'perang,', 'ri…
Asal
0 jokowi tinjau progres wisata premium labuan ba…
1 update covid-19 jatim: 314 kasus positif baru,…
2 azerbaijan vs armenia perang, kemlu ri: semua …
3 perkumpulan warga minang surabaya dukung machf…
4 azerbaijan vs armenia perang, ri serukan genca…
2 COBA DIBIKIN CONFUSSION MATRIX

[197]: from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import numpy as np
from sklearn.metrics import confusion_matrix
#lbltopic = data[‘Dominant_Topic’] #textpros = data[‘Text’]
[198]: # This converts the list of words into space-separated strings

#text_list
#df['isinya'] = df['isinya'].apply(lambda x: ' '.join(x))

#textpros = data['Text'].apply(lambda x: ' '.join(x))
count_vect = CountVectorizer()
countsv = count_vect.fit_transform(data['Text'])
[199]: transformer = TfidfTransformer().fit(countsv)

countsv = transformer.transform(countsv)
[200]: X_train, X_test, y_train, y_test = train_test_split(countsv,␣

,→data['Dominant_Topic'], test_size=0.1, random_state=69)
[201]: print(countsv[:5])
(0, 13062) 0.2789470811683493

(0, 12416) 0.3302658628965039
(0, 9983) 0.3856380533145348
(0, 9933) 0.3952840996405798
(0, 6599) 0.3952840996405798
(0, 5356) 0.19697505432178475
(0, 4652) 0.33814725661240264
(0, 1289) 0.3602292195566924
2
(0, 1045) 0.26265835012301936
(1, 12789) 0.4865069079637143
(1, 12787) 0.13427113605737787
(1, 11823) 0.12535024990481136
(1, 10996) 0.1707753898017361
(1, 10993) 0.12355543101275517
(1, 9858) 0.09629203648599599
(1, 5643) 0.4047431962748549
(1, 5632) 0.08249875186455764
(1, 5243) 0.11305965105757178
(1, 2636) 0.08407602342699545
(1, 1437) 0.3285681803271481
(1, 625) 0.41761471252258714
(1, 277) 0.2002009357807956
(1, 269) 0.20880735626129357
(1, 132) 0.34547486946410433
(2, 13073) 0.31993340458739944
(2, 12914) 0.32661014029531127
(2, 11019) 0.3464965733082987
(2, 10419) 0.24891541956174817
(2, 9330) 0.32460798562589627
(2, 5935) 0.3870418329130274
(2, 1225) 0.34071978906273015
(2, 1099) 0.33434120611855406
(2, 874) 0.35482943783014903
(3, 12984) 0.21849274041960373
(3, 11664) 0.26269513938718436
(3, 9431) 0.49306188197186135
(3, 7944) 0.3938875695892951
(3, 7793) 0.49306188197186135
(3, 6997) 0.3693175439087316
(3, 3858) 0.32479113519810243
(4, 12914) 0.3146165664353413
(4, 11141) 0.3323189370063989
(4, 11049) 0.3417996125269308
(4, 10419) 0.23977490277711946
(4, 9330) 0.3126879336409208
(4, 4326) 0.39926813999704075
(4, 4325) 0.38422281169893363
(4, 1225) 0.32820808948113606
(4, 1099) 0.3220637368814121
[202]: model = MultinomialNB().fit(X_train, y_train)
[203]: predicted = model.predict(X_test)

skorcfm = np.mean(predicted == y_test)
print(skorcfm)
3
0.8265602322206096
fjmltop = open(‘dataset/’+bln+‘333-skor-cnf-matrix.txt’,‘w’) fjmltop.write(str(skorcfm)) fjml-
top.close()
[204]: print(confusion_matrix(y_test, predicted))
[[372 33 48]
[ 52 375 42]
[ 36 28 392]]
[205]: %matplotlib inline

import matplotlib.pyplot as plt
from sklearn.utils.multiclass import unique_labels
[206]: def plot_confusion_matrix(y_true, y_pred, classes,

normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix

cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
4
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.

plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.

fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
[207]: np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix

#https://stackoverflow.com/questions/47755723/
,→creating-a-numpy-structure-scalar-instead-of-array
dff = np.array(('0','1','2'),dtype='U10') #--> Labeling ini diambil dari -->␣

,→print(df['sentimen']) diatas
#dff = data['Dominant_Topic']
plot_confusion_matrix(y_test, predicted, classes=dff,

title='Confusion matrix, without normalization')
# Plot normalized confusion matrix

#plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
# title='Normalized confusion matrix')
plt.show()
Confusion matrix, without normalization

[[372 33 48]
[ 52 375 42]
[ 36 28 392]]
5
[208]: plot_confusion_matrix(y_test, predicted, classes=dff, normalize=True,
title='Normalized confusion matrix')
plt.show()
Normalized confusion matrix

[[0.82 0.07 0.11]
[0.11 0.8 0.09]
[0.08 0.06 0.86]]
6
[166]: #confusion_matrix(y_test, predicted)
[167]: #Sumber : https://towardsdatascience.com/

,→understanding-the-confusion-matrix-and-how-to-implement-it-in-python-319202e0fe4d#ccfe
[179]: # Accuracy
from sklearn.metrics import accuracy_score
#accuracy_score(y_true, y_pred)
print("Accuracy Score : ",accuracy_score(y_test, predicted))
Accuracy Score : 0.8418568056648308
[169]: # Recall
from sklearn.metrics import recall_score
#recall_score(y_true, y_pred, average=None)
print(recall_score(y_test, predicted, average=None))
[0.83 0.86 0.84]
[170]: # Precision
from sklearn.metrics import precision_score
#precision_score(y_true, y_pred, average=None)
print(precision_score(y_test, predicted, average=None))
[0.84 0.84 0.85]
7
[171]: #Sumber : https://medium.com/@ksnugroho/
,→confusion-matrix-untuk-evaluasi-model-pada-unsupervised-machine-learning-bc4b1ae9ae3f
[178]: from sklearn.metrics import classification_report

#print (classification_report(y_test, y_pred))
print ("Classification Report : \n",classification_report(y_test, predicted))
Classification Report :
precision recall f1-score support
0 0.84 0.83 0.83 436

1 0.84 0.86 0.85 438
2 0.85 0.84 0.84 397
accuracy 0.84 1271

macro avg 0.84 0.84 0.84 1271
weighted avg 0.84 0.84 0.84 1271
[ ]:
[ ]:

3 Confussion Matrix Hasil Modelling OK

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

3 Confussion Matrix Hasil Modelling OK

Uploaded by

Copyright:

Available Formats

3-Confussion-Matrix-hasil-modelling-OK

November 15, 2022

1 TOPIC MODELLING BIGRAM-TRIGRAM

[24]: import pandas as pd

[196]: Unnamed: 0 Document_No Dominant_Topic Topic_Perc_Contrib \

2 COBA DIBIKIN CONFUSSION MATRIX

#lbltopic = data[‘Dominant_Topic’] #textpros = data[‘Text’]

[198]: # This converts the list of words into space-separated strings

#df['isinya'] = df['isinya'].apply(lambda x: ' '.join(x))

[199]: transformer = TfidfTransformer().fit(countsv)

[200]: X_train, X_test, y_train, y_test = train_test_split(countsv,␣

(0, 13062) 0.2789470811683493

[202]: model = MultinomialNB().fit(X_train, y_train)

[203]: predicted = model.predict(X_test)

[204]: print(confusion_matrix(y_test, predicted))

[205]: %matplotlib inline

[206]: def plot_confusion_matrix(y_true, y_pred, classes,

# Compute confusion matrix

# Rotate the tick labels and set their alignment.

# Loop over data dimensions and create text annotations.

# Plot non-normalized confusion matrix

dff = np.array(('0','1','2'),dtype='U10') #--> Labeling ini diambil dari -->␣

plot_confusion_matrix(y_test, predicted, classes=dff,

# Plot normalized confusion matrix

Confusion matrix, without normalization

Normalized confusion matrix

[167]: #Sumber : https://towardsdatascience.com/

Accuracy Score : 0.8418568056648308

[0.83 0.86 0.84]

[0.84 0.84 0.85]

[178]: from sklearn.metrics import classification_report

0 0.84 0.83 0.83 436

accuracy 0.84 1271

You might also like