Professional Documents
Culture Documents
IRS 122010304057.pdf
IRS 122010304057.pdf
IRS 122010304057.pdf
DOCUMENTATION
BACHELOR OF TECHNOLOGY
IN
Submitted by
122010304057 Kalyan
Associate Professor
GITAM
(Deemed to be University)
VISAKHAPATNAM
OCTOBER 202
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING
GITAM SCHOOL OF TECHNOLOGY GITAM
(Deemed to be University)
CERTIFICATE
This is to certify that the project report entitled “Lab Documentation” is a Bonafide
record of work carried out by(122010304057) Kalyan, students submitted in partial
fulfilment of requirement for the award of degree of Bachelors of Technology in
Computer Science and Engineering.
Professor
Task 1
Write a Program to classify the attendance as information using 75 percent threshold.
no_of_classes = [2, 3, 5, 2, 6, 5, 3, 4]
no_attended = [2, 2, 3, 1, 5, 4, 3, 2]
for i in range(len(no_of_classes)):
percent_attendance = (sum(no_attended)/sum(no_of_classes))*100
print("\nFinal Attendance:")
print(
else:
print(
Output:
Task 2
Implement the precision and recall using the digital library of GITAM-KRC.
y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1]
y_pred = [1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1]
Output:
Task 3
Implement any of the indexing algorithm.
class Index_algo:
self.index = {}
words = content.split()
documents.append(words)
if query in sub_doc:
sub_doc), sub_doc.index(query)
print(
index = Index_algo()
documents = []
words = text.split()
words.insert(0,"#")
print(words)
ngrams = []
if(len(words) % 2 != 0):
words.append("#")
ngrams.append(ngram)
return ngrams
ngrams = generate_ngrams(text,
print(ngram)
Output:
Task 5
Implement k-means clustering algorithm for Information extraction.
import numpy as np
text_data = [
"Social media has been the biggest platform for people to express
themself.", "Some people still prefer to lead a private life.",
"Many Problems throughout the world are expressed in the online platform.",
"The amount of data the social media companies need to handle is more.",
"Many compaines are worth billions as they succeed by the no of people using their platform giving
them lots of data to generate ads.",
k=3
np.random.seed(0)
words1 = set(text1.lower().split())
words2 = set(text2.lower().split())
common_words = len(words1.intersection(words2))
max_iterations = 100
for _ in range(max_iterations):
clusters = {}
for i, cluster_id in enumerate(cluster_assignments):
clusters[cluster_id] = []
clusters[cluster_id].append(text_data[i])
for cluster_id, cluster_texts in clusters.items():
print(f"- {text}")
print()
Output:
Task 6
Implement SDD search algorithm.
import numpy as np
documents = np.array([
])
print(relevant_centroid)
print(non_relevant_centroid)
print(updated_query_vector)
Output:
Task 7
Implement Knuth/Boyer Algorithm.
def compute_lps(pattern):
pattern[j]:
j = lps[j - 1]
if pattern[i] == pattern[j]:
j += 1
lps[i] = j
return lps
n = len(text)
m = len(pattern)
lps = compute_lps(pattern)
flag = False
while i < n:
if pattern[j] == text[i]:
i += 1
j += 1
if j == m:
print(f"Pattern found at
index {i - j}")
j = lps[j - 1]
flag = True
text[i]:
if j != 0:
j = lps[j - 1]
else:
i += 1
if (flag == False):
# Example usage:
under graduate"
kmp_search(text, pattern)
Output:
Task 8
Implement Relevance feedback for Youtube search.
import random
while 1:
if user_query == "end":
exit()
print("Search Results:")
print(f"{i}. {result}")
selected_video_index = int(input("Enter the number of the video you watched (0 to exit): "))
if selected_video_index == 0:
exit()
if user_feedback.lower() == "y":
relevance_increase = 0.1
else:
relevance_increase = -0.1
relevance_scores = [random.uniform(0, 1) for _ in search_results]
relevance_scores[selected_video_index - 1] += relevance_increase
print(f"{i}. {result}")
search_results = sorted_results
Output:
Task 9
Implement page rank algorithm for information retrieval.
import numpy as np
web_graph = np.array([
[0, 1, 1, 1],
[1, 0, 0, 1],
[0, 1, 0, 1],
[1, 0, 1, 0]
])
num_pages = len(web_graph)
print(pagerank)
damping_factor = 0.65
tolerance = 1e-6
while True:
new_pagerank = np.zeros(num_pages)
for i in range(num_pages):
for j in range(num_pages):
if web_graph[j, i] == 1:
new_pagerank[i] += pagerank[j] /
np.sum(web_graph[j, :])
break
pagerank = new_pagerank
sorted_pages = np.argsort(pagerank)[::-1]
for i in sorted_pages:
Output:
Task 10
Implement one of the similarity measurement algorithm.
import math
documents = [
def preprocess(doc):
return doc.lower().split()
document_terms = preprocess(document)
tf = {term: document_terms.count(
query_terms}
dot_product = sum(doc_tfidf[term] *
query_tfidf[term]
query_magnitude)
query_terms = preprocess(query)
document_similarities =
[cosine_similarity(compute_tfidf(
documents]
ranked_documents =
sorted(enumerate(document_similarities),