Professional Documents
Culture Documents
Email Classification
Email Classification
Email Classification
CLASSIFICATION
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
X=df.iloc[:,1:3001]
X
Y=df.iloc[:,-1].values
Y
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state
=1)
from sklearn.feature_extraction.text import CountVectorizer
#vectorising the text
vect=CountVectorizer(stop_words='english')
vect.fit(X_train)
vect.vocabulary_
vect.get_feature_names()
mnb= MultinomialNB(alpha=1.9)
#alpha by defalut ia 1. alpha must always be > 0.
#alpha is the'1' in the formula foe Laplace Smoothing (P(words
))
mnb.fit(X_train,Y_train)
Y_pred1=mnb.predict(X_test)
print("Accuracy Score for Naive Bayes : ",accuracy_score(Y_pred
1,Y_test))
- NOTES-