Professional Documents
Culture Documents
CODIGO#
CODIGO#
CODIGO#
import os
import numpy as np
import math
import scipy
import nltk
def ejecutarPrograma(opcion):
'''
'''
path= '/Users/PC_POWER/Downloads/Mineria_Datos'
documents = []
titles=[]
dirs = os.listdir(path)
if doc.endswith('.txt'):
titles.append(doc)
f=open(os.path.join(path,doc),'r')
words = f.read()
documents.append(words)
f.close()
#Genera stopwords
sw=stopwords.words('spanish')
tfidf_vectorizer = TfidfVectorizer(sw)
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
diccionario = tfidf_vectorizer.get_feature_names()
print (tfidf_matrix.shape)
print ('Obteniendo similitud de coseno entre 2 documentos (si son iguales el valor es
1)')
cosine=cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[99:100])
print (cosine)
dist = 1 - cosine
print (dist)
angle_in_radians = math.acos(cosine)
print (math.degrees(angle_in_radians))
dist = 1 - cosine_similarity(tfidf_matrix)
np.round(dist, 2)
if opcion==1:
print ('Inicio')
r = 1
d = 2 * r * (1 - cosine)
plt.ylim([-1.1, 1.1])
fig = plt.gcf()
fig.gca().add_artist(circle1)
fig.gca().add_artist(circle2)
print ('Fin')
elif opcion==2:
print ('Inicio')
# color-blind-friendly palette
plt.scatter(x, y, c=color)
plt.text(x, y, name)
plt.show()
print ('Fin')
elif opcion==3:
print ('Inicio')
pos = mds.fit_transform(dist)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.text(x, y, z, s)
plt.show()
print ('Fin')
else:
print ('Inicio')
linkage_matrix = ward(dist)
plt.tight_layout()
plt.show()
print ('Fin')
print ('Opciones:')
opcion=input()
ejecutarPrograma(opcion)