Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 4

#rm(list=ls())

library(readr)

data1 <- read.csv("Data/fertilizers.csv")

data2 <- melt(data1,"Ano")


temp <- unlist(strsplit(as.character(data2[,2]), ".", fixed = TRUE))
data2 <- cbind(data2, temp[c(seq(1,length(temp),by=2))], temp[c(seq(2,length(temp),by=2))])
colnames(data2) <- c("Year","Key","Consumption","Country","Fertilizer")

for (i in 1:10) {data1 <- cbind(data1, apply(data1[c(seq(i*4-2,i*4+1))], 1, sum, na.rm=TRUE))}

for (i in 1:4) {data1 <- cbind(data1, apply(data1[c(seq(1+i,37+i,by=4))], 1, sum, na.rm=TRUE))}

colnames(data1)[42:55] <- c("Brazil", "Canada", "China", "US", "France", "India", "Indonesia", "Pakistan", "Russia", "Turkey", "N",
"NPK", "P2O5", "K2O")

coldata <- seq(2,41)

colcountry <- seq(42,51)

colfert <- seq(52,55)

datasum <- data.frame(sapply(data1, quantile, na.rm=TRUE))

datasum <- rbind(datasum, datasum[4,]-datasum[2,], sapply(data1, mean, na.rm=TRUE), sapply(data1, sd, na.rm=TRUE), sapply(data1,
sum, na.rm=TRUE))

rownames(datasum) <- c("Min","Q1","Median","Q3","Max","IQR","Mean","St.Dev.","Sum")

datadif <- data.frame(diff(as.matrix(data1)))

#dataret <- (data1[2:nrow(data1),] - data1[1:(nrow(data1)-1),])/data1[1:(nrow(data1)-1),]

#dataret <- do.call(data.frame,lapply(dataret, function(x) replace(x, is.nan(x)|is.infinite(x),NA)))

Exploratory Analysis

Libraries, colors and lines

source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myGraphics.R")

loadlibrary("MASS")

loadlibrary("gclus")

loadlibrary("RColorBrewer")

loadlibrary("gridExtra")

loadlibrary("GGally")

loadlibrary("reshape")

loadlibrary("dplyr")

loadlibrary("WVPlots")

loadlibrary("aplpack")

mycolors=rainbow(length(unique(coldata)))
plot_size(4, 3)

Data Summary

t(sapply(data1, class))

datasum <- data.frame(sapply(data1, quantile, na.rm=TRUE))

datasum <- rbind(datasum, datasum[4,]-datasum[2,], sapply(data1, mean, na.rm=TRUE), sapply(data1, sd, na.rm=TRUE), sapply(data1,
sum, na.rm=TRUE))

rownames(datasum) <- c("Min","Q1","Median","Q3","Max","IQR","Mean","St.Dev.","Sum")

datasum[coldata]

datasum[colcountry]

datasum[colfert]

Boxplot

boxplot(data1[coldata], las=3, col="#009999")

Chernof Faces

labels = as.character(rownames(t(datasum[,coldata])))

faces(t(datasum[,coldata]), labels = labels, print.info=F, cex=1)

Images (Consumption)

#By country

temp <- as.matrix(data1[,colcountry])

x <- (1:nrow(temp))

y <- (1:ncol(temp))

image(x, y, temp, axes=FALSE, xlab="", ylab="")

axis(2, at = seq(1, ncol(temp), by = 1), labels=names(data1[colcountry]), las=2)

axis(1, at = seq(1, nrow(temp), by = 1), labels=data1$Ano, las=3)

#By fertilizer

temp <- as.matrix(data1[,colfert])

x <- (1:nrow(temp))

y <- (1:ncol(temp))

image(x, y, temp, axes=FALSE, xlab="", ylab="")


axis(2, at = seq(1, ncol(temp), by = 1), labels=names(data1[colfert]), las=2)

axis(1, at = seq(1, nrow(temp), by = 1), labels=data1$Ano, las=3)

Correlation matrix

col.set <- brewer.pal(11, 'Spectral')

mycolors <- col.set[c(1,3,5)]

#By country

temp <- exp_correlation(data1[,colcountry], color=mycolors)

plot(temp)

#By fertilizer

temp <- exp_correlation(data1[,colfert], color=mycolors)

plot(temp)

Advanced matrix dispersion

#By country

temp <- exp_advpair_plot(data=data1[colcountry], cnames=colnames(data1[colcountry]), colors=mycolors[1])

temp

#By fertilizer

temp <- exp_advpair_plot(data=data1[colfert], cnames=colnames(data1[colfert]), colors=mycolors[1])

temp

Preprocessing

Library and dataset loading

source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myPreprocessing.R")

loadlibrary("RColorBrewer")

loadlibrary("dplyr")

loadlibrary("gridExtra")

loadlibrary("reshape")

col.set <- brewer.pal(11, 'Spectral')

mycolors <- col.set[c(1,3,5,7,9)]

plot_size(4, 3)
Data Aggregation

Outlier

out <- outliers.boxplot(data)

myfert <- data[!out,]

head(data[out,])

You might also like