Download as pdf or txt
Download as pdf or txt
You are on page 1of 9

import​ java.util.

*;

import​ java.io.IOException;
import​ org.jsoup.Jsoup;
import​ org.jsoup.nodes.Document;
import​ org.jsoup.nodes.Element;
import​ org.jsoup.select.Elements;

class​ Main {
​public​ ​static​ ​void​ main(String[] args) ​throws​ IOException {

//Array of filter Words

String[] stopWords = {
​"a"​, "
​ as"​, ​"able"​, ​"about"​, ​"above"​, ​"according"​, ​"accordingly"​,
"across"​, " ​ actually"​, ​"after"​, ​"afterwards"​, ​"again"​, ​"against"​, ​"aint"​,
"all"​, ​"allow"​, ​"allows"​, ​"almost"​, ​"alone"​, ​"along"​, "
​ already"​, ​"also"​,
"although"​, ​"always"​, ​"am"​, ​"among"​, ​"amongst"​, ​"an"​, "​ and"​, ​"another"​,
"any"​, ​"anybody"​, ​"anyhow"​, ​"anyone"​, ​"anything"​, ​"anyway"​, "
​ anyways"​,
"anywhere"​, ​"apart"​, ​"appear"​, ​"appreciate"​, ​"appropriate"​, " ​ are"​,
"arent"​, ​"around"​, ​"aside"​, ​"ask"​, ​"asking"​, ​"associated"​, "
​ at"​,
"available"​, ​"away"​, ​"awfully"​, ​"be"​, ​"became"​, ​"because"​, "​ become"​,
​ becoming"​, ​"been"​, ​"before"​, "
"becomes"​, " ​ beforehand"​, ​"behind"​, ​"being"​,
"believe"​, "​ below"​, ​"beside"​, ​"besides"​, "​ best"​, ​"better"​, ​"between"​,
"beyond"​, ​"both"​, "
​ brief"​, ​"but"​, ​"by"​, ​"cmon"​, ​"cs"​, ​"came"​, ​"can"​,
"cant"​, ​"cannot"​, "​ cant"​, ​"cause"​, ​"causes"​, ​"certain"​, ​"certainly"​,
"changes"​, ​"clearly"​, ​"co"​, ​"com"​, ​"come"​, ​"comes"​, ​"concerning"​,
"consequently"​, ​"consider"​, ​"considering"​, ​"contain"​, ​"containing"​,
"contains"​, ​"corresponding"​, ​"could"​, ​"couldnt"​, ​"course"​, ​"currently"​,
"definitely"​, ​"described"​, " ​ despite"​, ​"did"​, ​"didnt"​, ​"different"​, ​"do"​,
"does"​, ​"doesnt"​, ​"doing"​, "​ dont"​, ​"done"​, ​"down"​, ​"downwards"​, ​"during"​,
"each"​, ​"edu"​, ​"eg"​, ​"eight"​, ​"either"​, ​"else"​, ​"elsewhere"​, ​"enough"​,
"entirely"​, ​"especially"​, ​"et"​, ​"etc"​, ​"even"​, ​"ever"​, ​"every"​,
"everybody"​, ​"everyone"​, ​"everything"​, ​"everywhere"​, ​"ex"​, " ​ exactly"​,
"example"​, ​"except"​, ​"far"​, ​"few"​, ​"ff"​, ​"fifth"​, ​"first"​, "​ five"​,
"followed"​, ​"following"​, ​"follows"​, ​"for"​, ​"former"​, ​"formerly"​, ​"forth"​,
"four"​, ​"from"​, ​"further"​, ​"furthermore"​, ​"get"​, ​"gets"​, ​"getting"​,
"given"​, ​"gives"​, ​"go"​, ​"goes"​, ​"going"​, ​"gone"​, ​"got"​, ​"gotten"​,
"greetings"​, ​"had"​, ​"hadnt"​, ​"happens"​, ​"hardly"​, ​"has"​, ​"hasnt"​, ​"have"​,
"havent"​, ​"having"​, ​"he"​, ​"hes"​, ​"hello"​, "
​ help"​, ​"hence"​, ​"her"​, ​"here"​,
"heres"​, ​"hereafter"​, ​"hereby"​, ​"herein"​, " ​ hereupon"​, ​"hers"​, ​"herself"​,
"hi"​, ​"him"​, ​"himself"​, ​"his"​, ​"hither"​, ​"hopefully"​, " ​ how"​, ​"howbeit"​,
"however"​, ​"i"​, ​"id"​, ​"ill"​, ​"im"​, ​"ive"​, ​"ie"​, ​"if"​, "​ ignored"​,
​ in"​, ​"inasmuch"​, ​"inc"​, ​"indeed"​, ​"indicate"​, ​"indicated"​,
"immediate"​, "
"indicates"​, "​ inner"​, ​"insofar"​, ​"instead"​, ​"into"​, ​"inward"​, ​"is"​,
"isnt"​, ​"it"​, ​"itd"​, ​"itll"​, ​"its"​, ​"its"​, "
​ itself"​, ​"just"​, "​ keep"​,
"keeps"​, ​"kept"​, ​"know"​, ​"knows"​, ​"known"​, " ​ last"​, ​"lately"​, "​ later"​,
"latter"​, ​"latterly"​, ​"least"​, ​"less"​, ​"lest"​, ​"let"​, ​"lets"​, ​"like"​,
"liked"​, ​"likely"​, ​"little"​, ​"look"​, ​"looking"​, ​"looks"​, ​"ltd"​, ​"mainly"​,
​ may"​, ​"maybe"​, ​"me"​, ​"mean"​, "
"many"​, " ​ meanwhile"​, ​"merely"​, ​"might"​,
"more"​, "​ moreover"​, ​"most"​, ​"mostly"​, " ​ much"​, ​"must"​, ​"my"​, ​"myself"​,
"name"​, ​"namely"​, ​"nd"​, ​"near"​, ​"nearly"​, ​"necessary"​, ​"need"​, ​"needs"​,
"neither"​, ​"never"​, ​"nevertheless"​, ​"new"​, ​"next"​, ​"nine"​, ​"no"​, ​"nobody"​,
​ none"​, ​"noone"​, ​"nor"​, "
"non"​, " ​ normally"​, ​"not"​ , ​"nothing"​, ​"novel"​,
"now"​, "​ nowhere"​, ​"obviously"​, " ​ of"​, ​"off"​, ​"often"​, ​"oh"​, ​"ok"​, ​"okay"​,
"old"​, ​"on"​, ​"once"​, ​"one"​, ​"ones"​, ​"only"​, ​"onto"​, ​"or"​, ​"other"​,
"others"​, ​"otherwise"​, ​"ought"​, ​"our"​, ​"ours"​, ​"ourselves"​, ​"out"​,
​ over"​, ​"overall"​, ​"own"​, ​"particular"​, "
"outside"​, " ​ particularly"​, ​"per"​,
"perhaps"​, "​ placed"​, ​"please"​, ​"plus"​, ​"possible"​, "​ presumably"​,
"probably"​, ​"provides"​, ​"que"​, ​"quite"​, ​"qv"​, ​"rather"​, ​"rd"​, ​"re"​,
"really"​, ​"reasonably"​, ​"regarding"​, ​"regardless"​, ​"regards"​,
"relatively"​, ​"respectively"​, ​"right"​, ​"said"​, ​"same"​, ​"saw"​, ​"say"​,
"saying"​, ​"says"​, ​"second"​, "
​ secondly"​, ​"see"​, ​"seeing"​, ​"seem"​, ​"seemed"​,
"seeming"​, ​"seems"​, ​"seen"​, "​ self"​, ​"selves"​, ​"sensible"​, ​"sent"​,
"serious"​, ​"seriously"​, ​"seven"​, ​"several"​, ​"shall"​, ​"she"​, ​"should"​,
"shouldnt"​, ​"since"​, ​"six"​, ​"so"​, ​"some"​, ​"somebody"​, ​"somehow"​,
"someone"​, ​"something"​, ​"sometime"​, ​"sometimes"​, ​"somewhat"​, ​"somewhere"​,
"soon"​, ​"sorry"​, ​"specified"​, ​"specify"​, ​"specifying"​, ​"still"​, ​"sub"​,
​ sup"​, ​"sure"​, ​"ts"​, ​"take"​, ​"taken"​, ​"tell"​, ​"tends"​, ​"th"​,
"such"​, "
"than"​, "​ thank"​, ​"thanks"​, ​"thanx"​, ​"that"​, ​"thats"​, ​"thats"​, ​"the"​,
"their"​, ​"theirs"​, ​"them"​, ​"themselves"​, ​"then"​, ​"thence"​, "
​ there"​,
"theres"​, ​"thereafter"​, ​"thereby"​, ​"therefore"​, ​"therein"​, " ​ theres"​,
"thereupon"​, ​"these"​, ​"they"​, ​"theyd"​, ​"theyll"​, ​"theyre"​, ​"theyve"​,
"think"​, ​"third"​, ​"this"​, ​"thorough"​, ​"thoroughly"​, ​"those"​, ​"though"​,
"three"​, ​"through"​, ​"throughout"​, ​"thru"​, ​"thus"​, ​"to"​, "
​ together"​, ​"too"​,
"took"​, ​"toward"​, ​"towards"​, ​"tried"​, ​"tries"​, ​"truly"​, "​ try"​, ​"trying"​,
​ two"​, ​"un"​, ​"under"​, "
"twice"​, " ​ unfortunately"​, ​"unless"​, ​"unlikely"​,
"until"​, "​ unto"​, ​"up"​, ​"upon"​, "​ us"​, ​"use"​, ​"used"​, ​"useful"​, ​"uses"​,
"using"​, ​"usually"​, ​"value"​, ​"various"​, ​"very"​, ​"via"​, ​"viz"​, ​"vs"​,
​ wants"​, ​"was"​, ​"wasnt"​, ​"way"​, ​"we"​, ​"wed"​, "
"want"​, " ​ well"​, "​ were"​,
"weve"​, "​ welcome"​, ​"well"​, ​"went"​, ​"were"​, ​"werent"​, " ​ what"​, "​ whats"​,
"whatever"​, ​"when"​, ​"whence"​, ​"whenever"​, ​"where"​, ​"wheres"​, ​"whereafter"​,
"whereas"​, ​"whereby"​, ​"wherein"​, ​"whereupon"​, ​"wherever"​, ​"whether"​,
​ while"​, ​"whither"​, ​"who"​, ​"whos"​, ​"whoever"​, ​"whole"​, ​"whom"​,
"which"​, "
"whose"​, "​ why"​, ​"will"​, ​"willing"​, ​"wish"​, ​"with"​, ​"within"​, ​"without"​,
​ wonder"​, ​"would"​, ​"would"​, ​"wouldnt"​, ​"yes"​, ​"yet"​, ​"you"​,
"wont"​, "
"youd"​, "​ youll"​, ​"youre"​, ​"youve"​, ​"your"​, ​"yours"​, ​"yourself"​,
"yourselves"​, ​"zero"​, ​"x"​, ​"s"​};

String[] stopWordsExtra = {
​"​\'​ll"​,​"​\'​s"​, ​"​\'​m"​,​"n​\'​t"​, ​"​\'​re"​, ​"​\'​ve"​, ​"​\'​d"​, ​"’ll"​,​"n’t"​, ​"’s"​,
"’m"​, ​"’re"​, ​"’ve"​, " ​ ’d"​, ​"s​\'​"​, ​"s’"​, ​"​\'​"​, ​"​\"​"​, ​"“"​, ​"”"​, ​"’"​, ​"©"​,
"℗"​, ​"®"​, ​"™"​, ​"•"​, ​"·"​, ​"–"​, ​"◉"​};

String[] positiveWords = {
​ able"​, "
" ​ accepting"​, ​"active"​, ​"addition"​, ​"admirable"​, ​"adorable"​,
"affirming"​, " ​ ageless"​, ​"agreeable"​, ​"abundant"​, ​"accomplished"​,
"accurate"​, ​"adaptable"​, ​"agile"​, ​"alert"​, ​"ambitious"​, ​"appreciative"​,
"attentive"​, ​"aware"​, ​"authentic"​, ​"attactive"​, ​"affectionate"​, ​"amazing"​,
"awesome"​, ​"amusing"​, ​"beautiful"​, ​"beloved"​, ​"benficial"​, ​"benevolent"​,
"best"​, ​"better"​, ​"blessed"​, ​"blissful"​, ​"blooming"​, ​"blossoming"​, ​"bold"​,
"brilliant"​, ​"brave"​, ​"caring"​, ​"cute"​, ​"creative"​, ​"calm"​, ​"capable"​,
"certain"​, ​"challenging"​, ​"charming"​, ​"choice"​, ​"clean"​, ​"comfortable"​,
"charitable"​, ​"careful"​, ​"cool"​, ​"charitable"​, ​"cheerful"​, ​"clear"​,
"committed"​, ​"competent"​, ​"concentration"​, ​"compassionate"​, ​"confident"​,
"consistent"​, ​"convincing"​, ​"courageous"​, ​"courteous"​, ​"cooperative"​,
"curious"​, ​"considerate"​, ​"desirable"​, ​"decent"​, ​"delicate"​, ​"delicious"​,
"dreamy"​, ​"dynamic"​, ​"daring"​, ​"dude!"​, ​"delightful"​, ​"dependable"​,
"desirable"​, ​"devoted"​, ​"determined"​, ​"diligent"​, ​"disciplined"​,
"diverse"​, ​"drive"​, ​"dazzling"​, ​"divine"​, ​"excellent"​, ​"educated"​,
​ empathetic"​, ​"easy"​, "
"efficient"​, " ​ enabling"​, ​"energetic"​, ​"engaging"​,
"enjoyable"​, "​ eager"​, ​"effective"​, "​ elated"​, ​"elegant"​, ​"encouraging"​,
"enthusiastic"​, ​"exciting"​, ​"experienced"​, ​"expert"​, "
​ explorer"​,
"expressive"​, ​"enlightened"​, ​"exalted"​, ​"empowered"​, "​ exhilirating"​,
"engrossing"​, ​"ecstatic"​, ​"entrancing"​, ​"enlivened"​, ​"fantastic"​,
"fabulous"​, ​"fair"​, ​"faithful"​, ​"famous"​, ​"favorite"​, ​"flexible"​,
"focused"​, ​"flourishing"​, ​"forgiving"​, ​"free"​, ​"fun"​, ​"frugal"​,
"friendly"​, ​"fascinating"​, ​"fulfilled"​, ​"foody"​, ​"feisty"​, ​"festive"​,
"good"​, ​"glowing"​, ​"generous"​, ​"genius"​, ​"genuine"​, ​"giving"​, ​"grace"​,
"gratitude"​, ​"growing"​, ​"grounded"​, ​"glorious"​, ​"groovy"​, ​"giddy"​, ​"glad"​,
"hopeful"​, ​"hot"​, ​"happy"​, ​"harmonious"​, ​"healthy"​, ​"helpful"​, ​"honest"​,
"humorous"​, ​"human"​, ​"hero"​, ​"holy"​, ​"honesty"​, ​"honorable"​, ​"hospitable"​,
"humble"​, ​"halo"​, ​"imaginative"​, ​"inspiring"​, ​"ideal"​, ​"incredible"​,
"interesting"​, ​"innovative"​, ​"improving"​, ​"imaginative"​, ​"independent"​,
"ingenius"​, ​"insightful"​, ​"inspiring"​, ​"integrity"​, ​"intelligent"​,
"involved"​, ​"inclusive"​, ​"intriguing"​, ​"intuitive"​, ​"joyful"​, ​"jokey"​,
"jolly"​, ​"jovial"​, ​"just"​, ​"jazzy"​, ​"jaunty"​, ​"jubilant"​, ​"junior"​,
"jumpy"​, ​"juvenile"​, ​"kind"​, ​"killer"​, ​"keen"​, ​"knowledgable"​, ​"kudos"​,
"kitschy"​, ​"kindred"​, ​"kool"​, ​"loving"​, ​"learner"​, ​"laugh"​, ​"leader"​,
​ lucky"​, ​"light"​, ​"loyal"​, ​"louable"​, ​"luxurious"​, ​"lively"​,
"logical"​, "
"likable"​, "​ magnificent"​, ​"meaningful"​, ​"majestic"​, ​"marvelous"​,
"motivating"​, ​"miraculous"​, ​"magic"​, ​"masterful"​, ​"mindful"​, ​"modest"​,
"merciful"​, ​"mellow"​, ​"nice"​, ​"noble"​, ​"neat"​, ​"new"​, ​"nurturing"​,
"noisy"​, ​"normal"​, ​"noteworthy"​, ​"novel"​, ​"nutty"​, "
​ outstanding"​,
"optimistic"​, ​"original"​, ​"obedient"​, ​"organized"​, " ​ perfect"​, ​"positive"​,
"peaceful"​, ​"paradisiacal"​, "
​ passionate"​, ​"powerful"​, "​ prepared"​,
"perceptive"​, ​"persistent"​, "​ pleasing"​, ​"prosperous"​, "​ playful"​,
"present"​, ​"quality"​, ​"quiet"​, ​"quaint"​, ​"qualified"​, ​"quick"​,
"respectful"​, ​"radiant"​, ​"ready"​, ​"rockin"​, ​"relaxing"​, ​"remarkable"​,
"rational"​, ​"respectful"​, ​"responsible"​, ​"resourceful"​, ​"romantic"​,
"righteous"​, ​"resilient"​, ​"rad"​, ​"soulmate"​, ​"special"​, ​"selfless"​,
"secure"​, ​"safe"​, ​"sincere"​, ​"stylish"​, ​"sympathetic"​, ​"strong"​,
"sparkly"​, ​"sunshiney"​, ​"spontaneous"​, ​"sweet"​, ​"supportive"​, ​"true"​,
"teachable"​, ​"trusting"​, ​"thankful"​, ​"timely"​, ​"tranquil"​, ​"tender"​,
"thrilling"​, ​"ticklish"​, ​"unique"​, ​"uplifting"​, ​"ultimate"​,
"unconditional"​, ​"upgrade"​, ​"useful"​, ​"unifying"​, ​"understanding"​,
"valuable"​, ​"virtuous"​, ​"valid"​, ​"viable"​, ​"victorious"​, ​"vibrant"​,
"worthy"​, ​"wild"​, ​"wacky"​, ​"wonderful"​, ​"winner"​, ​"welcome"​, ​"witty"​,
"wholesome"​, ​"yahoo"​, ​"yodeler"​, ​"yolo"​, ​"zesty"​, ​"zealous"​, ​"zany"​,
"zippy"​, ​"zoomy"​, ​"zingy"​, ​"zamazing"​ };

String[] negativeWords = {
​ abysmal"​, ​"adverse"​, ​"alarming"​, ​"angry"​, ​"annoy"​, ​"anxious"​,
"
"apathy"​, ​"appalling"​, ​"atrocious"​, ​"awful"​,​"bad"​, ​"banal"​, ​"barbed"​,
"belligerent"​, ​"bemoan"​, ​"beneath"​, " ​ boring"​, ​"broken"​, ​"callous"​,
"cant"​, ​"clumsy"​, ​"coarse"​, ​"cold"​, "​ coldhearted"​, ​"collapse"​, ​"confused"​,
"contradictory"​, ​"contrary"​, ​"corrosive"​, ​"corrupt"​, ​"crazy"​, "​ creepy"​,
"criminal"​, ​"cruel"​, ​"cry"​, ​"cutting"​, ​"damage"​, ​"damaging"​, "​ dastardly"​,
"dead"​, ​"decaying"​, ​"deformed"​, ​"deny"​, ​"deplorable"​, ​"depressed"​,
"deprived"​, ​"despicable"​, ​"detrimental"​, ​"dirty"​, ​"disease"​, ​"disgusting"​,
"disheveled"​, ​"dishonest"​, ​"dishonorable"​, ​"dismal"​, ​"distress"​, ​"dont"​,
"dreadful"​, ​"dreary"​, ​"enraged"​, ​"eroding"​, ​"evil"​, ​"fail"​, ​"faulty"​,
"fear"​, ​"feeble"​, ​"fight"​, ​"filthy"​, ​"foul"​, ​"frighten"​, ​"frightful"​,
"gawky"​, ​"ghastly"​, ​"grave"​, ​"greed"​, ​"grim"​, "
​ grimace"​, ​"gross"​,
"grotesque"​, ​"gruesome"​, ​"guilty"​, ​"haggard"​, " ​ hard"​, ​"hardhearted"​,
​ hate"​, "
"harmful"​, " ​ hideous"​, "​ homely"​, ​"horrendous"​, ​"horrible"​,
"hostile"​, "​ hurt"​, "​ hurtful"​, " ​ icky"​, ​"ignorant"​, ​"ignore"​, ​"ill"​,
"immature"​, ​"imperfect"​, ​"impossible"​, ​"inane"​, ​"inelegant"​, ​"infernal"​,
"injure"​, ​"injurious"​, ​"insane"​, ​"insidious"​, ​"insipid"​, ​"jealous"​,
​ lose"​, ​"lousy"​, ​"lumpy"​, ​"malicious"​, ​"mean"​, ​"menacing"​,
"junky"​, "
"messy"​, "​ misshapen"​, ​"missing"​, ​"misunderstood"​, ​"moan"​, ​"moldy"​,
"monstrous"​, ​"naive"​, ​"nasty"​, "
​ naughty"​, ​"negate"​, ​"negative"​, ​"never"​,
"no"​, ​"nobody"​, ​"nondescript"​, "​ nonsense"​, ​"not"​, ​"noxious"​,
"objectionable"​, ​"odious"​, ​"offensive"​, ​"old"​, ​"oppressive"​, ​"pain"​,
"perturb"​, ​"pessimistic"​, ​"petty"​, ​"plain"​, ​"poisonous"​, ​"poor"​,
"prejudice"​,​"questionable"​, ​"quirky"​, ​"quit"​, ​"reject"​, ​"renege"​,
"repellant"​, ​"reptilian"​, ​"repugnant"​, ​"repulsive"​, ​"revenge"​,
"revolting"​, ​"rocky"​, ​"rotten"​, ​"rude"​, ​"ruthless"​, ​"sad"​, ​"savage"​,
"scare"​, ​"scary"​, ​"scream"​, ​"severe"​, ​"shocking"​, ​"shoddy"​, ​"sick"​,
"sickening"​, ​"sinister"​, ​"slimy"​, ​"smelly"​, ​"sobbing"​, ​"sorry"​,
"spiteful"​, ​"sticky"​, ​"stinky"​, ​"stormy"​, ​"stressful"​, ​"stuck"​, ​"stupid"​,
"substandard"​, ​"suspect"​, ​"suspicious"​, ​"tense"​, ​"terrible"​, ​"terrifying"​,
"threatening"​, ​"ugly"​, "
​ undermine"​, ​"unfair"​, ​"unfavorable"​, ​"unhappy"​,
"unhealthy"​, ​"unjust"​, "​ unlucky"​, ​"unpleasant"​, ​"unsatisfactory"​,
"unsightly"​, ​"untoward"​, ​"unwanted"​, ​"unwelcome"​, " ​ unwholesome"​,
"unwieldy"​, ​"unwise"​, ​"upset"​, ​"vice"​, ​"vicious"​, "​ vile"​, ​"villainous"​,
"vindictive"​, ​"wary"​, ​"weary"​, ​"wicked"​, ​"woeful"​, ​"worthless"​,
"wound"​,​"yell"​, ​"yucky"​,​"zero"​};

//Array for storage

ArrayList<String> component = ​new​ ArrayList<String>();


ArrayList<Integer> componentHz = ​new​ ArrayList<Integer>();
ArrayList<String> URLs = ​new​ ArrayList<String>();
HashSet<String> links = ​new​ HashSet<String>();

//Variables

String text1, nextWord, unprocessedText;


Document doc;

//implementation

​//scanner for input

Scanner scanner = ​new​ Scanner(System.in);

System.out.println(​"​\n​Please enter product name: "​);


String searchTerm = scanner.nextLine();
System.out.println(​"Please enter number of results: "​);
​int​ num = scanner.nextInt();
System.out.println(​""​);

scanner.close();

​//Search for URLs

String GOOGLE_SEARCH_URL = ​"https://www.google.com/search?q="​ +


searchTerm + ​"+review&as_eq=youtube+facebook&num="​ + num;
​if​ (!links.contains(GOOGLE_SEARCH_URL)){
​try​ {
Document document = Jsoup.connect(GOOGLE_SEARCH_URL).get();
Elements linksOnPage = document.select(​"a[href]"​);

​for​ (Element page : linksOnPage){


​if​ (!(page.attr(​"abs:href"​).contains(​"https://www.google.com/"​)
|| page.attr(​"abs:href"​).contains(​"https://support.google.com/"​)
||
page.attr(​"abs:href"​).contains(​"https://policies.google.com/"​)
|| URLs.contains(​"abs:href"​))){

URLs.add(page.attr(​"abs:href"​));
}
}
}

​catch​ (IOException e){


System.err.println(​"For ​\'​"​ + GOOGLE_SEARCH_URL + ​"​\'​: "​ +
e.getMessage());
}
​//System.out.println(URLs);
}

​//start

​for​ (String currentURL : URLs){


doc = Jsoup.connect(currentURL).get();

unprocessedText = doc.text();

​//System.out.println("\ntitle is: " + unprocessedText);

String title =
doc.title().toLowerCase().replaceAll(​"​\\​p{Punct}"​,​""​).replaceAll(​"​\\​w*​\\​d​\
\​w* *"​, ​""​);
String[] titleWord = title.split(​" "​);

​//text clean

text1 = ​" "​ + unprocessedText


.replaceAll(​"​\n​"​, ​" "​)
.toLowerCase();

text1 = text1.trim()
.replaceAll(​"​\\​p{Punct}"​,​""​)
.replaceAll(​"​\\​w*​\\​d​\\​w* *"​, ​""​)
.replaceAll(​"[^​\\​x00-​\\​x7F]"​, ​""​);

​for​ (String v: titleWord){


text1 = text1.replace(​" "​+v+ ​" "​, ​" "​);
}
​for​ (String u: stopWordsExtra){
text1 = text1.replace(u, ​""​);
}
​for​ (String w: stopWords){
text1 = text1.replace(​" "​ + w + ​" "​, ​" "​);
}

text1 = text1.replaceAll(​"​\\​s+"​, ​" "​) + ​" "​;


​//System.out.println("\n" + text1);

​//Text count

​while​ (text1.contains(​" "​)){

nextWord = text1.substring(​0​,text1.indexOf(​" "​));

​if​ (!(nextWord.length() <= ​1​)){

​if​ (component.contains(nextWord)){
​ nt​ i = component.indexOf(nextWord);
i
componentHz.set(i, componentHz.get(i) + ​1​);
}
​else​{
component.add(nextWord);
componentHz.add(​1​);
}
}

text1 = text1.replaceFirst(nextWord + ​" "​, ​""​);


}
}
​//System.out.println(component);
​//System.out.println(componentHz);

​ /analysis
/
​int​ positiveCount = ​0​;
​for​ (String y: positiveWords){
​if​ (component.contains(y)){
positiveCount += componentHz.get(component.indexOf(y));
}
}

​int​ negativeCount = ​0​;


​for​ (String z: negativeWords){
​if​ (component.contains(z)){
negativeCount += componentHz.get(component.indexOf(z));
}
}

​for​ (​int​ i = ​0​; i<​20​;i++){

System.out.println(component.get(componentHz.indexOf(Collections.max(compo
nentHz))) + ​" "​ + Collections.max(componentHz) );
component.remove(componentHz.indexOf(Collections.max(componentHz)));
componentHz.remove(componentHz.indexOf(Collections.max(componentHz)));
}

System.out.println(​"​\n​Positive Words: "​ + positiveCount);


System.out.println(​"Negative Words: "​ + negativeCount);
​ /}
/
System.out.print(component);

}
}

You might also like