Professional Documents
Culture Documents
tp2 Indexation
tp2 Indexation
tp2 Indexation
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import re
class TextAnalyzer:
def __init__(self, text_list):
self.text_list = text_list
self.vectorizer = CountVectorizer(tokenizer=self.tokenize)
self.tfidf_vectorizer = TfidfVectorizer(tokenizer=self.tokenize)
def analyze_count(self):
count_matrix = self.vectorizer.fit_transform(self.text_list)
feature_names = self.vectorizer.get_feature_names_out()
def analyze_tfidf(self):
tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.text_list)
feature_names = self.tfidf_vectorizer.get_feature_names_out()
analyzer = TextAnalyzer(textes)
count_results = analyzer.analyze_count()
print("CountVectorizer results:")
print(count_results)
tfidf_results = analyzer.analyze_tfidf()
print("\nTfidfVectorizer results:")
print(tfidf_results)