Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

5/23/22, 8:08 PM Exp-9 - Jupyter Notebook

In [12]:

import numpy as np
import pandas as pd
from collections import Counter

In [13]:

data = pd.read_csv('./data/spam.csv')
data = data[['v1', 'v2']]
data.head()

Out[13]:

v1 v2

0 ham Go until jurong point, crazy.. Available only ...

1 ham Ok lar... Joking wif u oni...

2 spam Free entry in 2 a wkly comp to win FA Cup fina...

3 ham U dun say so early hor... U c already then say...

4 ham Nah I don't think he goes to usf, he lives aro...

In [14]:

data = data.sample(frac=1)
train, test = data[:4000], data[4000:]
X_train, X_test, y_train, y_test = train['v2'], test['v2'], train['v1'], test['v1']

In [15]:

X_train.shape, X_test.shape, y_train.shape, y_test.shape

Out[15]:

((4000,), (1572,), (4000,), (1572,))

In [16]:

vocab_spam = Counter()
for i in train[train['v1'] == 'spam']['v2']:
vocab_spam += Counter(i.split(' '))

vocab_ham = Counter()
for i in train[train['v1'] == 'ham']['v2']:
vocab_ham += Counter(i.split(' '))

In [17]:

test_sentence = 'to a a spam msg'

localhost:8888/notebooks/Exp-9.ipynb 1/2
5/23/22, 8:08 PM Exp-9 - Jupyter Notebook

In [18]:

intial_spam_guess = train[train['v1'] == 'spam'].shape[0]/train.shape[0]


intial_ham_guess = train[train['v1'] == 'ham'].shape[0]/train.shape[0]

In [22]:

d = {'TP': 0, 'FP': 0, 'TN': 0, 'FN': 0}

for (sentence, label) in zip(X_test, y_test):


spam_score = intial_spam_guess
ham_score = intial_ham_guess
for word in sentence.split(' '):
spam_score *= vocab_spam.get(word, 1)/sum(vocab_spam.values())
ham_score *= vocab_ham.get(word, 1)/sum(vocab_ham.values())

if spam_score > ham_score:


pred = 'spam'
else:
pred = 'ham'

if label == pred:
if label == 'spam':
d['TP'] += 1
else:
d['TN'] += 1
else:
if label == 'spam':
d['FP'] += 1
else:
d['FN'] += 1

In [29]:

print(f"Accuracy: {(d['TP'] + d['TN'])/sum(d.values())}")


print(f"Precision: {d['TP']/(d['TP'] + d['FP'])}")
print(f"Recall: {d['TP']/(d['TP'] + d['FN'])}")

Accuracy: 0.8810432569974554

Precision: 0.9808612440191388

Recall: 0.5283505154639175

In [ ]:

localhost:8888/notebooks/Exp-9.ipynb 2/2

You might also like