Artificial Intelligence Lab Finding the most common word and letter in any language
Submitted By: Muhammad Noman
Roll No: 42385 Batch: 5th Semester: 8th Due Date: 24/04/2019 Submitted To: Engr. Muhammad Dawood
Department of Software Engineering
Faculty of Engineering University of Science and Technology, Main Campus Township Bannu Lab Assignment: A sorted list of the top 10 words and letters in at least two languages of your choice together with their relative frequency (frequency of occurrence divided by total number). Also submit your code as a single Python file. Solution: # -*- coding: utf-8 -*- """ Created on Sun Apr 21 14:42:38 2019 @author: NomanHuma """ import wikipedia as wiki import re from collections import Counter wiki.set_lang("ur") #Setting the language to Urdu #All language codes available at: http://meta.wi def getWikiPage(s): """ This function returns the page associated wi If there are multiple pages associated with first one in the disambiguation of that page. Input: s (Wikipedia page title) Return: Wikipedia Python API page object """ try: p = wiki.page(s) except wiki.exceptions.DisambiguationError as disambiguation: #This exception is raised if there are mult print (disambiguation) #We display the titles of all the pages print ("Warning: Picking",disambiguation.options[0]) #But we s = disambiguation.options[0] #Like this! p = wiki.page(s) return p s =input('Enter Keyword here:\n') #Use wiki.random() to Pick a random title from Wikipedia. print ("The Keyword you have entered to extract an try: p=getWikiPage(s) except Exception as e: #Just in case there are any unexpected errors print ('Failed to access the page associated with \'',s,'\'. The 1 else: #If everything goes well, print stuff! print ('*'*10,"Title",p.title,'*'*10+'\n') print ('*'*10,"id",p.pageid,'*'*10+'\n') # ('*'*10,"Summary",'*'*10+'\n',p.summary) print ('*'*10,"Content",'*'*10+'\n',p.content) #print ('*'*10,"Links",'*'*10+'\n','\n'.join(p.links)) #just for fun! #List and Dictionary has been declared here char_dict=dict() char_list=list #Split the content here split_it = p.content.split() # Pass the split_it list to instance of Counter class. Counter = Counter(split_it) # most_common() produces k frequently encountered # input values and their respective counts. most_occur = Counter.most_common(10) print("The Most Common Words used in this Article:\n") for word in range(10): print("Word : ",most_occur[word][0], " Used ",most_occur[ #this will clean the content section from all of cleanText=re.sub(r'[\d+\W+]','',p.content) for i in cleanText: char_dict.update({i:cleanText.count(i)}) #This will sort all of the letters and will co char_dict=sorted(char_dict.items(),key=lambda x:(- x[1],x[0])) char_list=char_dict print("\n \nThe Most Common Letters used in this Article:\n") for ltr in range(10): print("Letter : ",char_list[ltr][0]," Used ",char_list[ltr][1],") Output of the Program: Here is the result for two different languages i.e. Urdu and English Keyword is: پ روگ رام وٹ ریک مپ شاع ری ارد و Keyword: Keyword: Pakistan