Professional Documents
Culture Documents
Sahil NLP
Sahil NLP
Sahil NLP
Roll No:-
nltk.download('punkt')
from nltk.tokenize import word_tokenize
sentence = "Tokenize this sentence."
tokens = word_tokenize(sentence)
print(tokens)
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
tokens = ["the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]
stop_words = set(stopwords.words('english'))
newfiltered_sentence = [word for word in tokens if not word.lower() in stop_words]
print(newfiltered_sentence)
nltk.download('averaged_perceptron_tagger')
tagged_words = nltk.pos_tag(tokens)
print(tagged_words)
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
sentence = "This is a sample sentence"
tokens = nltk.word_tokenize(sentence)
tagged_words = nltk.pos_tag(tokens)
print(tagged_words)
Q5:-To carry out chunking of the word based on parts of Speech tagging.
nltk.download('maxent_ne_chunker')
nltk.download('words')
ne_chunks = nltk.ne_chunk(tagged_words)
print(ne_chunks)
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag, RegexpParser
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
def tokenize_and_chunk_input():
user_input = input("Enter a sentence: ")
tokens = word_tokenize(user_input)
tagged_tokens = pos_tag(tokens)
grammar = r"""
NP: {<DT|JJ|NN.*>+}
PP: {<IN><NP>}
VP: {<VB.*><NP|PP|CLAUSE>+$}
CLAUSE: {<NP><VP>}
"""
chunk_parser = RegexpParser(grammar)
chunks = chunk_parser.parse(tagged_tokens)
return chunks
if __name__ == "__main__":
print("Tokenizing and chunking input from the user using NLTK...")
chunked_input = tokenize_and_chunk_input()
print("Chunked input:", chunked_input)
nltk.download('maxent_ne_chunker')
nltk.download('words')
ne_chunks = nltk.ne_chunk(tagged_words)
print(ne_chunks)
Q7:-Write a program to take input user and to carry out all the basic operations of NLP.
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag, RegexpParser
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
def tokenize_and_chunk_input():
user_input = input("Enter a sentence: ")
tokens = word_tokenize(user_input)
tagged_tokens = pos_tag(tokens)
grammar = r"""
NP: {<DT|JJ|NN.*>+}
PP: {<IN><NP>}
VP: {<VB.*><NP|PP|CLAUSE>+$}
CLAUSE: {<NP><VP>}
"""
chunk_parser = RegexpParser(grammar)
chunks = chunk_parser.parse(tagged_tokens)
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
return chunks
if __name__ == "__main__":
print("Tokenizing and chunking input from the user using NLTK...")
chunked_input = tokenize_and_chunk_input()
print("Chunked input:", chunked_input)
Q8:- To calculate the TFID Term frequency inverse documents frequency given set of sentences.
Q9:-Write a program to count vectorsation and transform each document into a vector of word
count.
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
Q10:-Write a python program ro find similar word using word to word modle.
Q11:-Write a python program to find similar word from given two paragraph.
import nltk
from nltk.corpus import gutenberg
nltk.download('gutenberg')
nltk.download('punkt')
paragraph1 = "Hello and welcome! How can I assist you today?"
paragraph2 = "Greetings! I'm here to help. What can I do for you?"
words_paragraph1 = set(nltk.word_tokenize(paragraph1.lower()))
words_paragraph2 = set(nltk.word_tokenize(paragraph2.lower()))
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
similar_words = words_paragraph1.intersection(words_paragraph2)
print("Similar words between the two paragraphs:")
print(similar_words)
Q12:-Write a python program to tokenize the given sentence using two methods sentence by
sentence.
import nltk
nltk.download('punkt')
def tokenize_sentences(text):
sentences = nltk.sent_tokenize(text)
return sentences
text = "Tokenization is the process of breaking text into sentences. It's an important step in natural
language processing."
sentences = tokenize_sentences(text)
for sentence in sentences:
print(sentence)
Q13:-Write a python program to take a sample of atleast 5 line and tokenization either by word or
sentence.
import nltk
nltk.download('punkt')
def tokenize_sentences(text):
sentences = nltk.sent_tokenize(text)
return sentences
text = "Tokenization is the process of breaking text into sentences. It's an important step in natural
language processing. Gjh. yhfh. Jhyfhu "
sentences = tokenize_sentences(text)
for sentence in sentences:
print(sentence)
nltk.sent_tokenize(text)
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
nltk.word_tokenize(text)
def tokenize_sentences(text):
sentences = nltk.sent_tokenize(text)
return sentences
def tokenize_words(sentence):
words = nltk.word_tokenize(sentence)
return words
Q14:-Write a python program to download sample text from guntaberg and tokenization by send
as well as verb. The sample text
import nltk
from nltk.corpus import gutenberg
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
nltk.download('gutenberg')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
sample_text = gutenberg.raw('shakespeare-hamlet.txt')[:1000]
tokens = word_tokenize(sample_text)
tagged_tokens = pos_tag(tokens)
nouns = [word for word, pos in tagged_tokens if pos.startswith('NN')]
verbs = [word for word, pos in tagged_tokens if pos.startswith('VB')]
print("Nouns:")
print(nouns)
print("\nVerbs:")
print(verbs)
Q15:-Write a python program to remove stopwords from a given text using build in stopwords list
from NLTK.
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
def remove_stopwords(text):
stop_words = set(stopwords.words('english'))
words = text.split()
filtered_text = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_text)
text = "This is a sample sentence with some stop words that need to be removed."
filtered_text = remove_stopwords(text)
import spacy
nlp = spacy.load("en_core_web_sm")
text = "This is a sample sentence with some stop words that need to be removed."
def remove_stopwords_spacy(text):
doc = nlp(text)
filtered_text = [token.text for token in doc if not token.is_stop]
return ' '.join(filtered_text)
filtered_text_spacy = remove_stopwords_spacy(text)
print("Filtered text (spaCy):", filtered_text_spacy)
Q20:-Write a python program to describe the stemming using Regular expression Stemming.
import re
def regex_stemmer(word):
patterns = [
(r's$', ''),
(r'ed$', ''),
(r'ing$', '')
]
for pattern, replacement in patterns:
if re.search(pattern, word):
return re.sub(pattern, replacement, word)
return word
words = ["running", "flies", "swimming", "happier", "cats", "dogs"]
stemmed_words = [regex_stemmer(word) for word in words]
for original, stemmed in zip(words, stemmed_words):
print(f"Original: {original}, Stemmed: {stemmed}")
import nltk
nltk.download('punkt')
nltk.download('hmm_treebank_pos_tagger')
nltk.download('averaged_perceptron_tagger')
hmm_tagger = nltk.tag.HiddenMarkovModelTagger.train([default_pos_tags])
hmm_pos_tags = hmm_tagger.tag(words)
print("\nStochastic POS tagging (Hidden Markov Model):")
print(hmm_pos_tags)
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import wordnet
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
text = "The quick brown fox jumps over the lazy dog."
words = word_tokenize(text)
default_pos_tags = pos_tag(words)
print("Default POS tagging:")
print(default_pos_tags)
print("\nRule-based POS tagging (same as default tagger):")
print(default_pos_tags)
nltk.download('hmm_treebank_pos_tagger')
hmm_tagger = nltk.tag.HiddenMarkovModelTagger.train([default_pos_tags])
hmm_pos_tags = hmm_tagger.tag(words)
print("\nStochastic POS tagging (Hidden Markov Model):")
print(hmm_pos_tags)
print("\nStochastic POS tagging (Hidden Markov Model):")
print(hmm_pos_tags)
def get_wordnet_pos(tag):
if tag.startswith('J'):
return wordnet.ADJ
elif tag.startswith('V'):
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
return wordnet.VERB
elif tag.startswith('N'):
return wordnet.NOUN
elif tag.startswith('R'):
return wordnet.ADV
else:
return None
wordnet_tags = pos_tag(words)
wordnet_pos_tags = [(word, get_wordnet_pos(tag)) for word, tag in wordnet_tags]
print("\nDictionary-based POS tagging (WordNet):")
print(wordnet_pos_tags)
Q22:-Write a python program to develop chatbot that helps to diagnosis simple flue symptoms.
import random
greetings = ["Hello!", "Hi there!", "Welcome!", "Greetings!"]
common_questions = [
"What is your name?",
"How can I help you today?",
"What symptoms are you experiencing?",
"Do you have any allergies?",
"Are you currently taking any medications?",
]
responses = [
"I'm sorry, I'm just a chatbot and cannot provide medical advice. It's best to consult with a healthcare
professional.",
"Please consult with a doctor for proper diagnosis and treatment.",
"It's important to seek medical attention for your condition.",
"I recommend reaching out to a healthcare professional to discuss your concerns.",
]
def get_random_greeting():
return random.choice(greetings)
def respond(user_input):
if user_input.endswith("?"):
return random.choice(responses)
else:
return random.choice(common_questions)
def chat():
print(get_random_greeting())
while True:
user_input = input(">")
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
if user_input.lower() == "exit":
break
print(respond(user_input))
chat()
Q22:-Write a python program to create an interactive health chatbot with possible diagnosis and
home treatment for the probably symptoms.
class HealthChatbot:
def __init__(self):
self.symptoms = []
def greet_user(self):
print("Hello! I am your health chatbot. Let's check your symptoms.")
def ask_symptoms(self):
print("Please answer the following questions with 'yes' or 'no'.")
self.symptoms.append(input("Do you have a fever? ").lower())
self.symptoms.append(input("Do you have a cough? ").lower())
self.symptoms.append(input("Do you have difficulty breathing? ").lower())
def diagnose(self):
fever = self.symptoms[0] == 'yes'
cough = self.symptoms[1] == 'yes'
difficulty_breathing = self.symptoms[2] == 'yes'
if fever and cough and difficulty_breathing:
print("Based on your symptoms, you may have pneumonia. Please consult a doctor immediately.")
elif fever and cough:
print("Based on your symptoms, you may have a common cold or flu. Get plenty of rest and
fluids.")
elif difficulty_breathing:
print("Based on your symptoms, you may have a respiratory issue. Seek medical attention
promptly.")
else:
print("Based on your symptoms, you seem to be generally healthy. However, if you feel unwell,
consult a doctor.")
def start(self):
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-
self.greet_user()
self.ask_symptoms()
self.diagnose()
if __name__ == "__main__":
chatbot = HealthChatbot()
chatbot.start()
Q23:-Write a python program using SVM and TFIDM analysis the given corpus of words.
Q24:-Write a python program to calculate bag of words after carry out the preprocessing of the
text as following convert all the text into lowercase alphabet and replace all the punctuation with a
Space.
text = "HOW how is the boss is a .Simple Example is sometimes better than better."
bow = create_bag_of_words(text)
print("Bag of Words:")
for word, count in bow.items():
print(f"{word}: {count}")