Professional Documents
Culture Documents
Tugas Mingguan MSL AP Ke-8 - Akhmad Safrin Sadad Khan - 191810101104
Tugas Mingguan MSL AP Ke-8 - Akhmad Safrin Sadad Khan - 191810101104
Oleh:
Akhmad Safrin Sadad Khan
191810101014
JURUSAN MATEMMATIKA
FAKULTAS MATEMATIKA DAN ILMU PENGETAHUAN ALAM
UNIVERSITAS JEMBER
2021
Tugas_Praktikum_8
Akhmad Safrin Sadad Khan
11/1/2021
Read Data
retails<-read.csv("retail.csv")
head(retails)
str(retails)
## 'data.frame': 9994 obs. of 15 variables:
## $ Row.ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Order.ID : chr "CA-2016-152156" "CA-2016-152156" "CA-2016-138688"
"US-2015-108966" ...
## $ Order.Date : chr "11/8/16" "11/8/16" "6/12/16" "10/11/15" ...
## $ Ship.Date : chr "11/11/16" "11/11/16" "6/16/16" "10/18/15" ...
## $ Ship.Mode : chr "Second Class" "Second Class" "Second Class"
"Standard Class" ...
## $ Customer.ID : chr "CG-12520" "CG-12520" "DV-13045" "SO-20335" ...
## $ Segment : chr "Consumer" "Consumer" "Corporate" "Consumer" ...
## $ Product.ID : chr "FUR-BO-10001798" "FUR-CH-10000454" "OFF-LA-
10000240" "FUR-TA-10000577" ...
## $ Category : chr "Furniture" "Furniture" "Office Supplies"
"Furniture" ...
## $ Sub.Category: chr "Bookcases" "Chairs" "Labels" "Tables" ...
## $ Product.Name: chr "Bush Somerset Collection Bookcase" "Hon Deluxe
Fabric Upholstered Stacking Chairs, Rounded Back" "Self-Adhesive Address
Labels for Typewriters by Universal" "Bretford CR4500 Series Slim Rectangular
Table" ...
## $ Sales : num 262 731.9 14.6 957.6 22.4 ...
## $ Quantity : int 2 3 2 5 2 7 4 6 3 5 ...
## $ Discount : num 0 0 0 0.45 0.2 0 0 0.2 0.2 0 ...
## $ Profit : num 41.91 219.58 6.87 -383.03 2.52 ...
library(lubridate)
##
## Attaching package: 'lubridate'
Konversi Data
retails$Ship.Date<-as.Date(retails$Ship.Date,format = "%m/%d/%y")
retails$Order.Date<-as.Date(retails$Order.Date,format = "%m/%d/%y")
retails[,c("Ship.Mode","Segment","Category","Sub.Category")]<-
lapply(retails[,c("Ship.Mode","Segment","Category","Sub.Category")],as.factor
)
str(retails)
Visualisasi
hist(retails$Sales, breaks=90)
boxplot(retails$Sales)
out_sales<-quantile(retails$Sales,probs = 0.75)+1.5*IQR(retails$Sales)
Cleaning Outlier
sales_without_outlier<-retails[retails$Sales<out_sales,]
boxplot(sales_without_outlier$Sales)
hist(sales_without_outlier$Sales)
hist(retails$Discount, breaks=20)
boxplot(retails$Discount)
out<-quantile(retails$Discount,probs = 0.75)+1.5*IQR(retails$Discount)
discount_without_outlier<-retails[retails$Discount<out,]
boxplot(discount_without_outlier$Discount)
hist(retails$Profit, breaks=20)
boxplot(retails$Profit,outline = TRUE)
Kolerasi
cor(retails$Sales,retails$Profit)
## [1] 0.4790643
qqplot(retails$Sales,retails$Profit)
## [1] -0.2194875
cor(retails$Profit,retails$Sales)
## [1] 0.4790643
cor(discount_without_outlier$Profit,discount_without_outlier$Discount)
## [1] -0.1577858
cor(sales_without_outlier$Profit,sales_without_outlier$Sales)
## [1] 0.1964482
Build Model Linier Regresi 1. Pembentkan Model linier regresi “Profit ~ Sales” tanpa
Outlier
ols_without_outliers<-lm(formula=Profit ~ Sales,data=sales_without_outlier)
class(ols_without_outliers)
## [1] "lm"
summary(ols_without_outliers)
##
## Call:
## lm(formula = Profit ~ Sales, data = sales_without_outlier)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1223.79 -3.62 0.97 10.36 196.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.349824 0.660415 5.072 4.01e-07 ***
## Sales 0.084519 0.004491 18.821 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.11 on 8825 degrees of freedom
## Multiple R-squared: 0.03859, Adjusted R-squared: 0.03848
## F-statistic: 354.2 on 1 and 8825 DF, p-value: < 2.2e-16
ols_without_outliers$coefficients
## (Intercept) Sales
## 3.34982400 0.08451873
## [1] "lm"
summary(ols_with_outliers)
##
## Call:
## lm(formula = Profit ~ Sales, data = retails)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7397.5 2.6 14.6 21.7 5261.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.732867 2.192459 -5.808 6.53e-09 ***
## Sales 0.180067 0.003301 54.555 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 205.6 on 9992 degrees of freedom
## Multiple R-squared: 0.2295, Adjusted R-squared: 0.2294
## F-statistic: 2976 on 1 and 9992 DF, p-value: < 2.2e-16
ols_with_outliers$coefficients
## (Intercept) Sales
## -12.7328671 0.1800667
## [1] "lm"
summary(ols_without_outliers_2)
##
## Call:
## lm(formula = Profit ~ Discount, data = discount_without_outlier)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1817.0 -59.5 -13.3 2.2 8328.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.689 2.901 24.71 <2e-16 ***
## Discount -292.435 19.217 -15.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 206 on 9070 degrees of freedom
## Multiple R-squared: 0.0249, Adjusted R-squared: 0.02479
## F-statistic: 231.6 on 1 and 9070 DF, p-value: < 2.2e-16
ols_without_outliers_2$coefficients
## (Intercept) Discount
## 71.68931 -292.43549
## [1] "lm"
summary(ols_with_outliers_2)
##
## Call:
## lm(formula = Profit ~ Discount, data = retails)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6493.2 -54.9 -15.9 9.4 8332.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.559 2.867 23.57 <2e-16 ***
## Discount -249.051 11.075 -22.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 228.6 on 9992 degrees of freedom
## Multiple R-squared: 0.04817, Adjusted R-squared: 0.04808
## F-statistic: 505.7 on 1 and 9992 DF, p-value: < 2.2e-16
ols_with_outliers_2$coefficients
## (Intercept) Discount
## 67.55941 -249.05142
data.frame(data_tes,prediksi_profit,profit_sesungguhnya)
data.frame(data_tes,prediksi_profit,profit_sesungguhnya)
##KESIMPULAN dari hasil prediksi model di atas yaitu model yang tidak memuat outlier
adalah model yang paling baik, yang dimana dapat dilihat dari hasil prediksi dari model
tanpa outlier yang prediksinya lebih mendekati dengan nilai aslinya dibandingkan pada
model yang memuat outlier