Professional Documents
Culture Documents
Project On Twitter Account
Project On Twitter Account
Import Libraries
In [130]:
import tweepy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
from tweepy import API
from tweepy import Cursor
from tweepy import OAuthHandler
import json
import datetime
import re
import seaborn as sns
import nltk
import warnings
warnings.filterwarnings('always') # "error", "ignore", "always", "default", "module" or "o
df = pd.read_csv('API_SET1.csv')
In [132]:
TwitterApiKey=df["Twitter_Api_Key"][0]
TwitterApiSecret=df["Twitter_Api_Secret_Key"][0]
TwitterApiAccessToken=df["Twitter_Api_access_token"][0]
TwitterApiSecretToken=df["Twitter_Api_secret_access_token"][0]
In [133]:
auth=tweepy.OAuthHandler(TwitterApiKey,TwitterApiSecret)
auth.set_access_token(TwitterApiAccessToken,TwitterApiSecretToken)
twitterApi=tweepy.API(auth,wait_on_rate_limit=True)
C:\Users\aadit\anaconda3\lib\site-packages\tweepy\auth.py:120: DeprecationWa
rning: OAuthHandler is deprecated; use OAuth1UserHandler instead.
warnings.warn(
twitterAccount = "@CobraTateKING"
In [135]:
tweets=tweepy.Cursor(twitterApi.user_timeline,
screen_name=twitterAccount,
count=None,
since_id=None,
max_id=None,trim_user=True,exclude_replies=True,contributor_details=Fal
include_entities=False).items(50);
In [137]:
df.head(10)
Out[137]:
Tweets
Text Cleaning
In [138]:
def cleanupTweet(txt):
txt = re.sub(r'@[A-Za-z0-9_]+','',txt)
txt = re.sub(r'#','',txt)
txt = re.sub(r'RT :','',txt)
txt = re.sub(r'https?:\/\/[A-Za-z0-9\.\/]+','',txt)
return txt
In [139]:
df.head(10)
Out[139]:
Tweets
In [140]:
df['Tweet'] = df['Tweets'].apply(cleanupTweet)
In [141]:
df.head()
Out[141]:
Tweets Tweet
0 Life’s simple. You make choices and you don’t ... Life’s simple. You make choices and you don’t ...
1 Life is Beautiful outside the Matrix.\n\nhttps... Life is Beautiful outside the Matrix.\n\n
2 Brotherhood is our most precious possession. h... Brotherhood is our most precious possession.
3 As soon as you are granted life,\n\nYou are gu... As soon as you are granted life,\n\nYou are gu...
4 Rules are for the Poor. https://t.co/Ln50iVJL7Z Rules are for the Poor.
Text Subjectivity
In [142]:
def getTextSubjectivity(txt):
return TextBlob(txt).sentiment.subjectivity
Text Polarity
In [143]:
def getTextPolarity(txt):
return TextBlob(txt).sentiment.polarity
Cleaned Tweets
In [144]:
df['Subjectivity'] = df['Tweets'].apply(getTextSubjectivity)
In [145]:
df['Polarity'] = df['Tweets'].apply(getTextPolarity)
In [146]:
df.head()
Out[146]:
Life’s simple. You make choices and Life’s simple. You make choices and
0 0.178571 0.000
you don’t ... you don’t ...
In [148]:
Out[148]:
Life’s simple. You make Life’s simple. You make {'neg': 0.0, 'neu': 1.0,
0 0.178571
choices and you don’t ... choices and you don’t ... 'pos': 0.0, 'compound...
Life is Beautiful outside the Life is Beautiful outside the {'neg': 0.0, 'neu': 0.562,
1 0.525000
Matrix.\n\nhttps... Matrix.\n\n 'pos': 0.438, 'comp...
Brotherhood is our most Brotherhood is our most {'neg': 0.0, 'neu': 0.556,
2 0.750000
precious possession. h... precious possession. 'pos': 0.444, 'comp...
In [149]:
Out[149]:
In [150]:
Out[150]:
{'neg': 0.0,
Life is Beautiful
Life is Beautiful outside 'neu': 0.562,
1 outside the 0.525000 0.5994 pos
the Matrix.\n\nhttps... 'pos': 0.438,
Matrix.\n\n
'comp...
{'neg': 0.437,
Rules are for the Poor. Rules are for the 'neu': 0.563,
4 0.600000 -0.4767 neg
https://t.co/Ln50iVJL7Z Poor. 'pos': 0.0,
'comp...
In [151]:
Out[151]:
Life’s
{'neg': 0.0,
Life’s simple. You simple. You
'neu': 1.0,
0 make choices and you make 0.178571 0.0000 pos 0
'pos': 0.0,
don’t ... choices and
'compound...
you don’t ...
As soon as
{'neg': 0.144,
As soon as you are you are
'neu': 0.704,
3 granted life,\n\nYou granted 0.000000 -0.2023 neg 0
'pos': 0.152,
are gu... life,\n\nYou
'co...
are gu...
{'neg': 0.437,
Rules are for the Poor. Rules are 'neu': 0.563,
4 0.600000 -0.4767 neg 0
https://t.co/Ln50iVJL7Z for the Poor. 'pos': 0.0,
'comp...
In [152]:
Out[152]:
Life’s
{'neg': 0.0,
Life’s simple. You simple. You
'neu': 1.0,
0 make choices and you make 0.178571 0.0000 pos 0
'pos': 0.0,
don’t ... choices and
'compound...
you don’t ...
As soon as
{'neg': 0.144,
As soon as you are you are
'neu': 0.704,
3 granted life,\n\nYou granted 0.000000 -0.2023 neg 0
'pos': 0.152,
are gu... life,\n\nYou
'co...
are gu...
{'neg': 0.437,
Rules are for the Poor. Rules are 'neu': 0.563,
4 0.600000 -0.4767 neg 0
https://t.co/Ln50iVJL7Z for the Poor. 'pos': 0.0,
'comp...
In [153]:
Out[153]:
Life’s
{'neg': 0.0,
Life’s simple. You simple. You
'neu': 1.0,
0 make choices and you make 0.178571 0.0000 pos 0
'pos': 0.0,
don’t ... choices and
'compound...
you don’t ...
As soon as
{'neg': 0.144,
As soon as you are you are
'neu': 0.704,
3 granted life,\n\nYou granted 0.000000 -0.2023 neg 0
'pos': 0.152,
are gu... life,\n\nYou
'co...
are gu...
{'neg': 0.437,
Rules are for the Poor. Rules are 'neu': 0.563,
4 0.600000 -0.4767 neg 0
https://t.co/Ln50iVJL7Z for the Poor. 'pos': 0.0,
'comp...
In [154]:
df.isnull().sum()
Out[154]:
Tweets 0
Tweet 0
Subjectivity 0
Polarity 0
Compound 0
comp_score 0
neg_score 0
pos_score 0
neu_score 0
dtype: int64
Correlation
In [155]:
sns.set(rc={'figure.figsize':(10,7)})
sns.heatmap(df.corr(),annot=True,cmap='cubehelix')
Out[155]:
<AxesSubplot:>
In [156]:
df.corr()
Out[156]:
Visualization
localhost:8888/notebooks/Andrew Tate Twitter Account NLP.ipynb 10/20
10/18/22, 7:00 AM Andrew Tate Twitter Account NLP - Jupyter Notebook
Compound score
In [157]:
sns.set(rc={'figure.figsize':(10,7)})
sns.set_style('whitegrid')
sns.countplot(x='comp_score',data=df,palette='viridis')
Out[157]:
<AxesSubplot:xlabel='comp_score', ylabel='count'>
Compound value
In [158]:
sns.set(rc={'figure.figsize':(20,10)})
sns.set_style('whitegrid')
sns.countplot(x='Compound',data=df,palette='viridis')
Out[158]:
<AxesSubplot:xlabel='Compound', ylabel='count'>
Negative score
In [159]:
sns.set(rc={'figure.figsize':(12,7)})
sns.set_style('whitegrid')
sns.countplot(x='neg_score',data=df,palette='viridis')
Out[159]:
<AxesSubplot:xlabel='neg_score', ylabel='count'>
Positive score
In [160]:
sns.set(rc={'figure.figsize':(15,8)})
sns.set_style('whitegrid')
sns.countplot(x='pos_score',data=df,palette='viridis')
Out[160]:
<AxesSubplot:xlabel='pos_score', ylabel='count'>
Neutral score
localhost:8888/notebooks/Andrew Tate Twitter Account NLP.ipynb 12/20
10/18/22, 7:00 AM Andrew Tate Twitter Account NLP - Jupyter Notebook
In [161]:
sns.set(rc={'figure.figsize':(25,10)})
sns.set_style('whitegrid')
sns.countplot(x='neu_score',data=df,palette='viridis')
Out[161]:
<AxesSubplot:xlabel='neu_score', ylabel='count'>
Subjectivity score
In [162]:
sns.set(rc={'figure.figsize':(50,18)})
sns.set_style('whitegrid')
sns.countplot(x='Subjectivity',data=df,palette='Paired')
Out[162]:
<AxesSubplot:xlabel='Subjectivity', ylabel='count'>
In [163]:
sns.set(rc={'figure.figsize':(14,7)})
sns.set_style('whitegrid')
sns.countplot(x='neg_score',hue='comp_score',data=df,palette='Paired_r')
Out[163]:
<AxesSubplot:xlabel='neg_score', ylabel='count'>
In [164]:
sns.set(rc={'figure.figsize':(20,10)})
sns.set_style('whitegrid')
sns.countplot(x='pos_score',hue='comp_score',data=df,palette='Paired')
Out[164]:
<AxesSubplot:xlabel='pos_score', ylabel='count'>
sns.set(rc={'figure.figsize':(30,15)})
sns.set_style('whitegrid')
sns.countplot(x='neu_score',hue='comp_score',data=df,palette='Paired_r')
Out[165]:
<AxesSubplot:xlabel='neu_score', ylabel='count'>
In [166]:
sns.set(rc={'figure.figsize':(50,20)})
sns.set_style('whitegrid')
sns.countplot(x='Subjectivity',hue='comp_score',data=df,palette='Paired')
Out[166]:
<AxesSubplot:xlabel='Subjectivity', ylabel='count'>
sns.set(rc={'figure.figsize':(14,7)})
sns.set_style('whitegrid')
sns.lineplot( x='pos_score',
y='neg_score',
data=df,color='blue')
Out[167]:
<AxesSubplot:xlabel='pos_score', ylabel='neg_score'>
In [168]:
df.head()
Out[168]:
Life’s
{'neg': 0.0,
Life’s simple. You simple. You
'neu': 1.0,
0 make choices and you make 0.178571 0.0000 pos 0
'pos': 0.0,
don’t ... choices and
'compound...
you don’t ...
As soon as
{'neg': 0.144,
As soon as you are you are
'neu': 0.704,
3 granted life,\n\nYou granted 0.000000 -0.2023 neg 0
'pos': 0.152,
are gu... life,\n\nYou
'co...
are gu...
{'neg': 0.437,
Rules are for the Poor. Rules are 'neu': 0.563,
4 0.600000 -0.4767 neg 0
https://t.co/Ln50iVJL7Z for the Poor. 'pos': 0.0,
'comp...
Machine Learning
In [169]:
X = df['Tweet']
y = df['comp_score']
In [170]:
In [171]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=100)
CountVectorizer
In [172]:
Out[172]:
(Documents,Features)
In [173]:
X_train_counts.shape
Out[173]:
(35, 230)
In [174]:
count_vect.get_feature_names()
Out[174]:
['about',
'act',
'activities',
'activity',
'aft',
'air',
'also',
'always',
'am',
'amazed',
'an',
'and',
'andrew',
'anything',
'are',
'as',
'at',
'average',
In [175]:
Out[175]:
(35, 230)
Tf-IDF Vectorizer
In [176]:
Out[176]:
(35, 230)
Training a Classifier
In [177]:
Out[177]:
LinearSVC()
Pipeline
In [178]:
Out[178]:
Results
In [179]:
predictions = text_clf.predict(X_test)
In [180]:
In [181]:
print(metrics.confusion_matrix(y_test,predictions))
[[0 6]
[0 9]]
Accuracy 75%
In [182]:
Out[182]:
0.7499999999999999
In [183]:
print(metrics.classification_report(y_test,predictions,zero_division=1))
accuracy 0.60 15
In [ ]: