Documento Sin Título

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 1

from google.

colab import drive


drive.mount("/content/drive", force_remount=True)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

# Cargar el dataset
dataset = pd.read_csv('/content/drive/MyDrive/Colab/LineFault.csv')
print(dataset)

# Limpieza de datos
labels = ['Ia', 'Ib', 'Ic', 'Place' ,'Va', 'Vb', 'Vc']
for lbl in labels:
dataset[lbl].replace('?', np.NaN, inplace=True)

# Eliminar filas con valores faltantes


dataset_cleaned = dataset.dropna()

# Estandarizar características
X = dataset_cleaned[['Ia', 'Ib', 'Ic', 'Place', 'Va', 'Vb', 'Vc']]
scaler = StandardScaler()
X_resampled = scaler.fit_transform(X)

# Ver el DataFrame estandarizado


print(X_resampled)

# Verificar si el dataset está balanceado


y = dataset_cleaned['Fault_Type']
counter = Counter(y)
plt.bar(counter.keys(), counter.values())
plt.xlabel('Fault_Type')
plt.ylabel('Count')
plt.show()

# Aplicar submuestreo para balancear las clases(submuestreo aleatorio)


ros = RandomOverSampler(random_state=42)
X_resampled2, y_resampled = ros.fit_resample(X_resampled, y)

# Ver la gráfica del dataset balanceado


plt.bar(y_resampled.unique(), y_resampled.value_counts())
plt.xlabel('Fault_Type')
plt.ylabel('Count')
plt.title('Distribución de clases en el dataset balanceado')
plt.show()

You might also like