Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 2

#!

/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd()+"/nltk_data"
import nltk
from nltk.corpus import brown,stopwords

# Complete the 'calculateCFD' function below.


#
# The function accepts following parameters:
# 1. STRING_ARRAY cfdconditions
# 2. STRING_ARRAY cfdevents
#

def calculateCFD(cfdconditions, cfdevents):


# Write your code here
stopword = set(stopwords.words('english'))

cdev_cfd = nltk.ConditionalFreqDist([(genre, word.lower()) for genre in


brown.categories() for word in brown.words(categories=genre) if not word.lower() in
stopword])
cdev_cfd.tabulate(conditions = cfdconditions, samples = cfdevents)

inged_cfd = [(genre, word.lower()) for genre in brown.categories() for word in


brown.words(categories=genre) if (word.lower().endswith('ing') or
word.lower().endswith( 'ed' ))]

inged_cfd = [ list (x) for x in inged_cfd]

for wd in inged_cfd:
if wd[1].endswith( 'ing') and wd[1] not in stopword:
wd[1] = 'ing'
elif wd[1].endswith( 'ed') and wd[1] not in stopword:
wd[1] = 'ed'
print(inged_cfd)
inged_cfd = nltk.ConditionalFreqDist(inged_cfd)
print(inged_cfd.conditions())
inged_cfd.tabulate(conditions=cfdconditions, samples = ['ed', 'ing'])

if __name__ == '__main__':
cfdconditions_count = int(input().strip())

cfdconditions = []

for _ in range(cfdconditions_count):
cfdconditions_item = input()
cfdconditions.append(cfdconditions_item)

cfdevents_count = int(input().strip())

cfdevents = []

for _ in range(cfdevents_count):
cfdevents_item = input()
cfdevents.append(cfdevents_item)

if not os.path.exists(os.getcwd() + "/nltk_data"):


with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
zip_ref.extractall(os.getcwd())

calculateCFD(cfdconditions, cfdevents)

You might also like