Codigo

library(tidyverse)
library(ggthemes)
library(ggpubr)
#modelado
#===============================================
#
#sudo apt install default-jre
library(h2o)
#creacion de un cluster local con todos los cores disponibles.

h2o.init(
ip = "localhost",
# -1 indica que se empleen todos los cores disponibles.
nthreads = -1,
# Maxima memoria disponible para el cluster
max_mem_size = "6g"
)
# Datos simulados
# ==============================================================================
datos = read_csv('https://raw.githubusercontent.com/JoaquinAmatRodrigo/Estadistica-
machine-learning-python/master/data/blobs.csv')
datos <- datos %>% mutate(y = as.factor(y))
ggplot(data = datos, aes(x = x_1, y = x_2, fill = y)) +

geom_point(shape = 21, size = 2) +
theme_fivethirtyeight() +
theme(
legend.position = "none",
text = element_blank(),
axis.ticks = element_blank()
)
datos_h2o <- as.h2o(datos)

particiones <- h2o.splitFrame(data = datos_h2o, ratios = c(0.6, 0.2), seed = 123)
datos_train <- h2o.assign(data = particiones[[1]], key = "datos_train")
datos_validation <- h2o.assign(data = particiones[[2]], key = "datos_validacion")
datos_test <- h2o.assign(data = particiones[[3]], key = "datos_test")
# Modelos
# ==============================================================================
modelo_1 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = datos_train,
standardize = TRUE,
activation = "Rectifier",
adaptive_rate = FALSE,
hidden = 1,
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "modelo_1"
)

x = c("x_1", "x_2"),
y = "y",
standardize = TRUE,
hidden = 10,
epochs = 1000,
seed = 123,
)

x = c("x_1", "x_2"),
y = "y",
standardize = TRUE,
hidden = c(10, 10),
epochs = 1000,
seed = 123,
)

x = c("x_1", "x_2"),
y = "y",
standardize = TRUE,
hidden = c(50, 50, 50),
epochs = 1000,
seed = 123,
)
# Predicciones de cada modelo

# ==============================================================================
grid_predicciones <- expand.grid(

x_1 = seq(from = min(datos$x_1), to = max(datos$x_1), length = 75),
x_2 = seq(from = min(datos$x_2), to = max(datos$x_2), length = 75)
)
grid_predicciones_h2o <- as.h2o(grid_predicciones)
predicciones_1 <- h2o.predict(

object = modelo_1,
newdata = grid_predicciones_h2o
)
object = modelo_2,
)

object = modelo_3,
)

object = modelo_4,
)
grid_predicciones$modelo_1 <- as.vector(predicciones_1$predict)

# Gráfico de predicciones
# ==============================================================================
p1 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_1)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura: (5)") +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())

labs(title = "Arquitectura: (10)") +

labs(title = "Arquitectura: (20, 20)") +

labs(title = "Arquitectura: (50, 50, 50)") +
ggarrange(p1, p2, p3, p4, nrow = 2, ncol = 2)
# Número de neuronas
# ==============================================================================
hiperparametros <- list(hidden = c(1, 5, 10, 15, 25, 50, 100, 300, 500))
grid_dl <- h2o.grid(

algorithm = "deeplearning",
epochs = 100,
# Variable respuesta y predictores
x = c("x_1", "x_2"),
y = "y",
# validation_frame = datos_validation, # Para validación simple
nfolds = 3, # validación cruzada
standardize = TRUE,
hyper_params = hiperparametros,
search_criteria = list(strategy = "Cartesian"),
seed = 123,
grid_id = "grid_dl"
)
# Se muestran los modelos ordenados de mayor a menor accuracy

resultados_grid <- h2o.getGrid(
sort_by = 'accuracy',
grid_id = "grid_dl",
decreasing = TRUE
)
data.frame(resultados_grid@summary_table) %>%
mutate(
accuracy = as.numeric(accuracy),
hidden = str_remove_all(hidden, pattern = "\\[|\\]"),
hidden = as.numeric(hidden),
) %>%
ggplot(aes(x=hidden, y=accuracy, group=1)) +
geom_line() +
geom_point() +
labs(title="Accuracy del modelo vs número de neuronas") +
theme_bw()
# Learning rate
# ==============================================================================
hiperparametros <- list(rate = c(0.00001, 0.0001, 0.0001, 0.001, 0.01, 0.1, 1, 10))
grid_dl_2 <- h2o.grid(

rate_annealing = 0,
rate_decay = 0,
nesterov_accelerated_gradient = FALSE,
hidden = 10,
epochs = 50,
x = c("x_1", "x_2"),
y = "y",
standardize = TRUE,
seed = 123,
grid_id = "grid_dl_2"
)
# Se muestran los modelos ordenados de mayor a menor accuracy

grid_id = "grid_dl_2",
decreasing = TRUE
)
data.frame(resultados_grid@summary_table) %>% select(-model_ids) %>%

mutate(accuracy = as.numeric(accuracy),
rate = as.numeric(rate)) %>%
ggplot(aes(x=rate, y=accuracy, group=1)) +
scale_x_continuous(trans='log10') +
geom_line() +
geom_point() +
labs(title="Accuracy del modelo vs learning rate") +
theme_bw()
# Learning rate + número de neuronas

# ==============================================================================
hiperparametros <- list(
rate = c(0.00001, 0.0001, 0.0001, 0.001, 0.01, 0.1, 1),
hidden = c(1, 5, 10, 15, 25, 50, 100, 300, 500)
)
grid_dl_3 <- h2o.grid(

rate_annealing = 0,
rate_decay = 0,
nesterov_accelerated_gradient = FALSE,
epochs = 50,
x = c("x_1", "x_2"),
y = "y",
standardize = TRUE,
seed = 123,
grid_id = "grid_dl_3"
)
grid_id = "grid_dl_3",
decreasing = TRUE
)

Codigo

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Codigo

Uploaded by

Copyright:

Available Formats

library(tidyverse)

#creacion de un cluster local con todos los cores disponibles.

datos <- datos %>% mutate(y = as.factor(y))

ggplot(data = datos, aes(x = x_1, y = x_2, fill = y)) +

datos_h2o <- as.h2o(datos)

modelo_2 <- h2o.deeplearning(

modelo_3 <- h2o.deeplearning(

modelo_4 <- h2o.deeplearning(

# Predicciones de cada modelo

grid_predicciones <- expand.grid(

grid_predicciones_h2o <- as.h2o(grid_predicciones)

predicciones_1 <- h2o.predict(

predicciones_3 <- h2o.predict(

predicciones_4 <- h2o.predict(

grid_predicciones$modelo_1 <- as.vector(predicciones_1$predict)

p2 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_2)) +

p3 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_3)) +

p4 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_4)) +

ggarrange(p1, p2, p3, p4, nrow = 2, ncol = 2)

grid_dl <- h2o.grid(

# Se muestran los modelos ordenados de mayor a menor accuracy

grid_dl_2 <- h2o.grid(

# Se muestran los modelos ordenados de mayor a menor accuracy

data.frame(resultados_grid@summary_table) %>% select(-model_ids) %>%

# Learning rate + número de neuronas

grid_dl_3 <- h2o.grid(

You might also like