Professional Documents
Culture Documents
Message
Message
import random
import math
import pandas as pd
import numpy as np
best_k = 0
# Ejecutar el código
print("para el 80%")
filename = 'color_iris.csv'
split = 0.2
training_set, test_set = load_dataset(filename, split)
kbest_80 = 0
kaccuracy80 = 0.01
for i in k_values:
print("Para k= ", i)
# utilizar el conjunto completo para la evaluación final
training_set, test_set = load_dataset(filename, 1 - split)
correct_predictions, wrong_predictions = evaluate_algorithm(
training_set, test_set, i)
valor = report_results(correct_predictions, wrong_predictions)
if valor >= kaccuracy80:
kbest_80 = i
kaccuracy80 = valor
print("El mejor k para 80% fue de: ", kbest_80)
# Ejecutar el código
print("para el 50%")
filename = 'color_iris.csv'
split = 0.5
training_set, test_set = load_dataset(filename, split)
kbest_50 = 0
kaccuracy50 = 0.01
for i in k_values:
print("Para k= ", i)
# utilizar el conjunto completo para la evaluación final
training_set, test_set = load_dataset(filename, 1 - split)
correct_predictions, wrong_predictions = evaluate_algorithm(
training_set, test_set, i)
valor = report_results(correct_predictions, wrong_predictions)
if valor >= kaccuracy50:
kbest_50 = i
kaccuracy50 = valor
print("El mejor k para 50% fue de: ", kbest_50)
# Ejecutar el código
print("para el 75%")
filename = 'color_iris.csv'
split = 0.25
training_set, test_set = load_dataset(filename, split)
kbest_75 = 0
kaccuracy75 = 0.01
for i in k_values:
print("Para k= ", i)
# utilizar el conjunto completo para la evaluación final
training_set, test_set = load_dataset(filename, 1 - split)
correct_predictions, wrong_predictions = evaluate_algorithm(
training_set, test_set, i)
valor = report_results(correct_predictions, wrong_predictions)
if valor >= kaccuracy75:
kbest_75 = i
kaccuracy75 = valor
print("El mejor k para 75% fue de: ", kbest_75)
data = pd.read_csv("Iris.csv")
setosa_distance = []
versicolor_distance = []
virginica_distance = []
for i in range(len(data)):
if data.iloc[i]['Species'] == 'Iris-setosa':
distance = euclidean_distance(data.iloc[i][[0, 3]],
setosa_sample.mean(numeric_only=True)[[0, 3]])
setosa_distance.append(distance)
elif data.iloc[i]['Species'] == 'Iris-versicolor':
distance = euclidean_distance(data.iloc[i][[0, 3]],
versicolor_sample.mean(numeric_only=True)[[0, 3]])
versicolor_distance.append(distance)
else:
distance = euclidean_distance(data.iloc[i][[0, 3]],
virginica_sample.mean(numeric_only=True)[[0, 3]])
virginica_distance.append(distance)