Professional Documents
Culture Documents
J.K. Institute of Applied Physics and Technology: Natural Language Processing Assignment
J.K. Institute of Applied Physics and Technology: Natural Language Processing Assignment
INSTITUTE OF APPLIED
PHYSICS AND TECHNOLOGY
if m==0:
return n
if n==0:
return m
if str1[m-1]==str2[n-1]:
return editDistance(str1,str2,m-1,n-1)
str1 = "sunday"
str2 = "saturday"
query = query.lower()
first_letter = letters[0]
letters = letters[1:]
if len(letters) == 0:
to_replace = {('b', 'f', 'p', 'v'): 1, ('c', 'g', 'j', 'k', 'q', 's', 'x', 'z'): 2,
if first_letter in group]
if char in group]
if first_letter == letters[0]:
letters[0] = query[0]
else:
letters.insert(0, query[0])
first_letter = letters[0]
letters = letters[1:]
letters.append(0)
letters.insert(0, first_letter)
return string
print (soundex("King"))
print(sent_tokenize(text))
print(word_tokenize(text))`
stemmer = PorterStemmer()
'plotted']
singles = [stemmer.stem(plural) for plural in plurals]
print(' '.join(singles))
6. Use any tagger to tag a text file. Now write a program to find the most
likely tag in the tagged text.
import nltk
8.Write a program to check whether a given grammar is Context Free Grammar (CFG)
or not?
termials = ['a']
nonterminals = ['S']
regular = True
leftRG = False
rightRG = False
if not(leftRG or rightRG):
iflen(rightSide) > 1:
if (nonterminal in rightSide[0]):
leftRG = True
rightRG = True
else:
regular = regular and not (nonterminal in rightSide)
ifrightRG:
ifleftRG:
print(regular)
import copy
import re
# Remove large rules (more than 2 states in the right part, eg. A->BCD)
def large(rules,let,voc):
# Make a hard copy of the dictionary (as its size is changing over the # process)
new_dict = copy.deepcopy(rules)
values = new_dict[key]
fori in range(len(values)):
iflen(values[i]) > 2:
if j==0:
rules[key][i] = rules[key][i][0] + let[0]
else:
voc.append(let[0])
new_key = copy.deepcopy(let[0])
let.remove(let[0])
rules.setdefault(new_key, []).append(values[i][-2:])
returnrules,let,voc
def empty(rules,voc):
e_list = []
new_dict = copy.deepcopy(rules)
values = new_dict[key]
fori in range(len(values)):
e_list.append(key)
# remove empty state
rules[key].remove(values[i])
iflen(rules[key]) == 0:
voc.remove(key)
rules.pop(key, None)
new_dict = copy.deepcopy(rules)
values = new_dict[key]
fori in range(len(values)):
# and C in vocabulary
iflen(values[i]) == 2:
# check for rule in the form A->BC, excluding the case that
rules.setdefault(key, []).append(values[i][1])
# check for rule in the form A->CB, excluding the case that
if values[i][0]!=values[i][1]:
rules.setdefault(key, []).append(values[i][0])
returnrules,voc
def short(rules,voc):
# D(A) = {A})
D = dict(zip(voc, voc))
# just transform value from string to list, to be able to insert more values
for key in D:
D[key] = list(D[key])
# for every letter A of the vocabulary, if B->C, B in D(A) and C not in D(A)
# add C in D(A)
if key in D[letter]:
values = rules[key]
fori in range(len(values)):
D.setdefault(letter, []).append(values[i])
rules,D = short1(rules,D)
returnrules,D
def short1(rules,D):
# remove short rules (with length in right side = 1)
new_dict = copy.deepcopy(rules)
values = new_dict[key]
fori in range(len(values)):
iflen(values[i]) == 1:
rules[key].remove(values[i])
# replace each rule A->BC with A->B'C', where B' in D(B) and C' in D(C)
values = rules[key]
fori in range(len(values)):
for j in D[values[i][0]]:
for k in D[values[i][1]]:
rules.setdefault(key, []).append(j + k)
returnrules,D
deffinal_rules(rules,D,S):
for let in D[S]:
for v in rules[let]:
if v not in rules[S]:
rules.setdefault(S, []).append(v)
return rules
# Print rules
defprint_rules(rules):
values = rules[key]
fori in range(len(values)):
return 1
def main():
rules = {}
voc = []
# This list's going to be our "letters pool" for naming new states
let.remove('e')
while True:
userInput = input('Give number of rules')
try:
N = int(userInput)
else: break
exceptValueError:
# Initial state
while True:
character!')
else:break
print('+------------------------------------------------------+')
print('|or A BCD, if more than one states in the right part |')
print('+------------------------------------------------------+')
fori in range(N):
voc.append(l)
if l in let:
let.remove(l)
for l in to:
voc.append(l)
if l in let:
let.remove(l)
rules.setdefault(fr, []).append(to)
rules,let,voc = large(rules,let,voc)
print_rules(rules)
#print voc
rules,voc = empty(rules,voc)
print_rules(rules)
#print voc
print('\nRules after short rules removal')
rules,D = short(rules,voc)
print_rules(rules)
print('\nFinal rules')
rules = final_rules(rules,D,S)
print_rules(rules)
if __name__ == '__main__':
main()
#include<iostream.h>
#include<conio.h>
#include<string.h>
class parse
intnt,t,m[20][20],i,s,n,p1,q,k,j;
char p[30][30],n1[20],t1[20],ch,b,c,f[30][30],fl[30][30];
public:
int scant(char);
intscannt(char);
void process();
void input();
};
int parse::scannt(char a)
int c=-1,i;
for(i=0;i<nt;i++)
if(n1[i]==a)
returni;
return c;
int parse::scant(char b)
int c1=-1,j;
for(j=0;j<t;j++)
if(t1[j]==b)
return j;
return c1;
}
void parse::input()
cin>>n;
for(i=0;i<n;i++)
cin>>p[i];
nt=0;
t=0;
void parse::process()
for(i=0;i<n;i++)
if(scannt(p[i][0])==-1)
n1[nt++]=p[i][0];
for(i=0;i<n;i++)
for(j=3;j<strlen(p[i]);j++)
if(p[i][j]!='e')
if(scannt(p[i][j])==-1)
if((scant(p[i][j]))==-1)
t1[t++]=p[i][j];
t1[t++]='$';
for(i=0;i<nt;i++)
for(j=0;j<t;j++)
m[i][j]=-1;
for(i=0;i<nt;i++)
cout<<"Enter first["<<n1[i]<<"]:";
cin>>f[i];
for(i=0;i<nt;i++)
cout<<"Enter follow["<<n1[i]<<"]:";
cin>>fl[i];
for(i=0;i<n;i++)
p1=scannt(p[i][0]);
if((q=scant(p[i][3]))!=-1)
m[p1][q]=i;
if((q=scannt(p[i][3]))!=-1)
for(j=0;j<strlen(f[q]);j++)
m[p1][scant(f[q][j])]=i;
if(p[i][3]=='e')
for(j=0;j<strlen(fl[p1]);j++)
m[p1][scant(fl[p1][j])]=i;
for(i=0;i<t;i++)
cout<<"\t"<<t1[i];
cout<<endl;
for(j=0;j<nt;j++)
cout<<n1[j];
for(i=0;i<t;i++)
cout<<"\t"<<" ";
if(m[j][i]!=-1)
cout<<p[m[j][i]];
cout<<endl;
}
void main()
clrscr();
parse p;
p.input();
p.process();
getch();
class State(object):
def __init__(self, label, rules, dot_idx, start_idx, end_idx, idx, made_from, producer):
self.label = label
self.rules = rules
self.dot_idx = dot_idx
self.start_idx = start_idx
self.end_idx = end_idx
self.idx = idx
self.made_from = made_from
self.producer = producer
def next(self):
"""Returns the tag after the dot"""
returnself.rules[self.dot_idx]
def complete(self):
returnlen(self.rules) == self.dot_idx
def __str__(self):
rule_string = ''
fori, rule in enumerate(self.rules):
ifi == self.dot_idx:
rule_string += '\\bullet '
rule_string += rule + ' '
ifself.dot_idx == len(self.rules):
rule_string += '\\bullet'
return 'S%d %s -> %s [%d, %d] %s %s' % (self.idx, self.label, rule_string, self.start_idx,
self.end_idx, self.made_from, self.producer)
classEarley:
def __init__(self, words, grammar, terminals):
self.chart = [[] for _ in range(len(words) + 1)]
self.current_id = 0
self.words = words
self.grammar = grammar
self.terminals = terminals
defget_new_id(self):
self.current_id += 1
returnself.current_id - 1
defis_terminal(self, tag):
return tag in self.terminals
defis_complete(self, state):
returnlen(state.rules) == state.dot_idx
def parse(self):
self.enqueue(State('gamma', ['S'], 0, 0, 0, self.get_new_id(), [], 'dummy start state'), 0)
def __str__(self):
res = ''
return res
def test():
grammar = {
'S': [['NP', 'VP'], ['Aux', 'NP', 'VP'], ['VP']],
'NP': [['Det', 'Nominal'], ['Proper-Noun']],
'Nominal': [['Noun'], ['Noun', 'Nominal']],
'VP': [['Verb'], ['Verb', 'NP']],
'Det': ['that', 'this', 'a'],
'Noun': ['book', 'flight', 'meal', 'money'],
'Verb': ['book', 'include', 'prever'],
'Aux': ['does'],
'Prep': ['from', 'to', 'on'],
'Proper-Noun': ['Houston', 'TWA']
}
terminals = ['Det', 'Noun', 'Verb', 'Aux', 'Prep', 'Proper-Noun']
12. Write a program that takes as input and output its various senses and part of speeches. Use
English WordNet and its APIs.
importnltk
stop_words=set(stopwords.words('english'))
tokens = set(word_tokenize(sentence))
sent=tokens.difference(stop_words)
print(sense+'---- '+lesk(sentence,sense).definition())
print()
print('*****************************************************************')
print(nltk.pos_tag(tokens))