Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 3

# Install necessary libraries

install.packages("readr")
install.packages("caret")
install.packages("randomForest")
install.packages("class")
install.packages("e1071")
install.packages("dplyr")
install.packages("tidyr")
install.packages("stringr")

# Load libraries
library("readr", quietly = TRUE)
library('caret', quietly = TRUE)
library('randomForest', quietly = TRUE)
library('class', quietly = TRUE)
library('e1071', quietly = TRUE)
library('dplyr', quietly = TRUE)
library('tidyr', quietly = TRUE)
library('stringr', quietly = TRUE

# set the working directly


setwd("/cloud/project")

# Read data
train <- read_csv("train.csv")
test <- read_csv("test.csv")

# Storing the PassengerId column from the test data


passid <- test$PassengerId

# Separating the 'PassengerId' column in the 'train' dataframe into 'groups' and
'num' columns using '_' as the separator
train <- separate(train, PassengerId, into = c("groups", "num"), sep = "_")

# Separating the 'Cabin' column in the 'train' dataframe into 'c_deck', 'C_num',
and 'C_side' columns using '/' as the separator
train <- separate(train, Cabin, into = c("c_deck", "C_num", "C_side"), sep = "/")

# Filling missing values in certain columns with default values


train$HomePlanet <- coalesce(train$HomePlanet, "Earth")
train$Destination <- coalesce(train$Destination, "TRAPPIST-1e")
train$RoomService <- coalesce(train$RoomService, 0)
train$FoodCourt <- coalesce(train$FoodCourt, 0)
train$ShoppingMall <- coalesce(train$ShoppingMall, 0)
train$Spa <- coalesce(train$Spa, 0)
train$VRDeck <- coalesce(train$VRDeck, 0)
train$CryoSleep <- coalesce(train$CryoSleep, as.logical("FALSE"))

# Converting certain columns to factors for categorical encoding


train$VIP <- coalesce(train$VIP, as.logical("FALSE"))
train$c_deck <- coalesce(train$c_deck, "N")
train$C_side <- coalesce(train$C_side, "N")

# Calculating the sum of 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', and


'VRDeck' columns and storing the result in 'bills' column
train$bills <- train$RoomService + train$FoodCourt + train$ShoppingMall + train$Spa
+ train$VRDeck
# Categorizing passengers into age groups based on the 'Age' column and storing the
result in the 'age_group' column
train$age_group <- case_when(
train$Age <= 10 ~ 0,
train$Age <= 20 ~ 1,
train$Age <= 30 ~ 2,
train$Age <= 40 ~ 3,
train$Age <= 50 ~ 4,
train$Age <= 60 ~ 5,
train$Age <= 70 ~ 6,
train$Age <= 80 ~ 7,
TRUE ~ 99
)

# Converting certain columns to factors for categorical encoding


train$VIP <- as.factor(train$VIP)
train$Destination <- as.factor(train$Destination)
train$HomePlanet <- as.factor(train$HomePlanet)
train$Transported <- as.factor(train$Transported)
train$CryoSleep <- as.factor(train$CryoSleep)
train$c_deck <- as.factor(train$c_deck)
train$C_num <- as.factor(train$C_num)

# Creating a new dataframe 'df' by selecting specific columns from 'train' for
modeling
df <- train %>% select(-c(Name, groups, C_num, Age))

# Set up your control parameters for cross-validation


ctrl <- trainControl(method = "cv", number = 5) # 5-fold cross-validation

# Training a random forest model ('randomForest') with 'Transported' as the target


variable and all other variables as predictors
model_rf <- train(Transported ~ ., data = df, method = "rf", trControl = ctrl)

# Train k-Nearest Neighbors (KNN) model


model_knn <- train(Transported ~ ., data = df, method = "knn", trControl = ctrl)

# Train Support Vector Machine (SVM) model


model_svm <- train(Transported ~ ., data = df, method = "svmRadial", trControl =
ctrl)

# Display accuracies
cat('Random Forest Accuracy =', model_rf$results$Accuracy, '\n')
cat('KNN Accuracy =', model_knn$results$Accuracy, '\n')
cat('SVM Accuracy =', model_svm$results$Accuracy, '\n')

# Make predictions on test data


test_predictions_rf <- predict(model_rf, newdata = test)
test_predictions_knn <- predict(model_knn, newdata = test)
test_predictions_svm <- predict(model_svm, newdata = test)

# Create submission files


submission_rf <- data.frame(PassengerId = passid, Transported =
as.factor(test_predictions_rf))
submission_knn <- data.frame(PassengerId = passid, Transported =
as.factor(test_predictions_knn))
submission_svm <- data.frame(PassengerId = passid, Transported =
as.factor(test_predictions_svm))
# Write submission files to CSV
write.csv(submission_rf, "submission_rf.csv", row.names = FALSE)
write.csv(submission_knn, "submission_knn.csv", row.names = FALSE)
write.csv(submission_svm, "submission_svm.csv", row.names = FALSE)

You might also like