Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

HEALTHCARE COST ANALYSIS

(SOURCE CODE)
DESCRIPTION
Background and Objective:

A nationwide survey of hospital costs conducted by the US Agency for Healthcare consists of hospital
records of inpatient samples. The given data is restricted to the city of Wisconsin and relates to
patients in the age group 0-17 years. The agency wants to analyze the data to research on
healthcare costs and their utilization.

Domain: Healthcare

Dataset Description:

Here is a detailed description of the given dataset:

Attribute Description

Age Age of the patient discharged

Female A binary variable that indicates if the patient is female

Los Length of stay in days

Race Race of the patient (specified numerically)

Totchg Hospital discharge costs

Aprdrg All Patient Refined Diagnosis Related Groups

Analysis to be done:

1. To record the patient statistics, the agency wants to find the age category of people who
frequently visit the hospital and has the maximum expenditure.

2. In order of severity of the diagnosis and treatments and to find out the expensive treatments, the
agency wants to find the diagnosis-related group that has maximum hospitalization and expenditure.

3. To make sure that there is no malpractice, the agency needs to analyze if the race of the patient is
related to the hospitalization costs.

4. To properly utilize the costs, the agency has to analyze the severity of the hospital costs by age
and gender for the proper allocation of resources.

5. Since the length of stay is the crucial factor for inpatients, the agency wants to find if the length of
stay can be predicted from age, gender, and race.
6. To perform a complete analysis, the agency wants to find the variable that mainly affects hospital
costs.

# Setting Working Directory

rm(list = ls())

getwd()

setwd(choose.dir())

# Import Data

hosp_cost<- read.csv(file="1555054100_hospitalcosts.csv",header = TRUE)

# 1. To record the patient statistics, the agency wants to find the age category of people who
frequently visit the hospital and has the maximum expenditure.

View(hosp_cost)

head(hosp_cost)

summary(hosp_cost)

summary(hosp_cost$AGE)

table(hosp_cost$AGE)

summary(as.factor(hosp_cost$AGE))

age_table <- table(hosp_cost$AGE)

View(age_table)

age_table <- as.data.frame(age_table)

class(age_table)

N_Max_Patient <- max(age_table$Freq)

N_Max_Patient
Patient_Age_idx <- which.max(age_table$Freq)

Patient_Age_idx

Patient_Age <- age_table[Patient_Age_idx,1]

Patient_Age

barplot(age_table$Freq,names.arg = age_table$Var1,xlab = "Age",ylab = "Number of Vistis",main =


"Age Vs Number of Visits")

Expenditure_by_Age <- aggregate(TOTCHG ~ AGE, data = hosp_cost, sum)

class(Expenditure_by_Age)

View(Expenditure_by_Age)

Max_Expenditure <- max(Expenditure_by_Age)

Max_Expenditure

Age_Max_Exp_idx <- which.max(Expenditure_by_Age$TOTCHG)

Age_Max_Exp_idx

Age_Max_Exp <- Expenditure_by_Age[Age_Max_Exp_idx,1]

Age_Max_Exp

barplot(Expenditure_by_Age$TOTCHG,names.arg = Expenditure_by_Age$AGE,xlab = "Age",ylab =


"Expenditure",main = "Age Vs Expenditure")

# Printing results

print(paste("Age of patient who frequently visit the hospital is ",Patient_Age,"With frequency -


",N_Max_Patient))

print(paste("Patients with age",Age_Max_Exp,"Has maximum expenditure of",Max_Expenditure))


#2. In order of severity of the diagnosis and treatments and to find out the expensive treatments,
the agency wants to find the diagnosis related group that has maximum hospitalization and
expenditure.

t <- table(hosp_cost$APRDRG)

d <- as.data.frame(t)

names(d)[1] = 'Diagnosis Group'

Group_Max_Hosp_idx <- which.max(d$Freq)

Group_Max_Hosp <- d[Group_Max_Hosp_idx,1]

Group_Max_Hosp

Group_Max_Hosp_Freq <- d[Group_Max_Hosp_idx,2]

Group_Max_Hosp_Freq

barplot(d$Freq,names.arg = d$`Diagnosis Group`,xlab = "Diagnosis Group",ylab = "Number of


Vitis",main = "Diagnosis Vs Hospitalization")

Expenditure_by_Group <- aggregate(TOTCHG ~ APRDRG, data = hosp_cost, sum)

Expenditure_by_Group

Group_Max_Expe_idx <- which.max(Expenditure_by_Group$TOTCHG)

Group_Max_Expe_idx

Group_with_Max_Expe <- Expenditure_by_Group[Group_Max_Expe_idx,1]

Group_with_Max_Expe

Group_Max_Expe <- Expenditure_by_Group[Group_Max_Expe_idx,2]

Group_Max_Expe

barplot(Expenditure_by_Group$TOTCHG,names.arg =Expenditure_by_Group$APRDRG,xlab =
"Diagnosis Group",ylab = "Expenditure",main = "Diagnosis Vs Expenditure")

# Printing results

print(paste("Group of patient who frequently visit the hospital is ",Group_Max_Hosp,"With


frequency -",Group_Max_Hosp_Freq))
print(paste("Patients with group",Group_with_Max_Expe,"Has maximum expenditure
of",Group_Max_Expe))

#3. To make sure that there is no malpractice, the agency needs to analyze if the race of the patient
is related to the hospitalization costs

table(hosp_cost$RACE)

hosp_cost$RACE <- as.factor(hosp_cost$RACE)

fit <- lm(TOTCHG ~ RACE,data=hosp_cost)

fit

summary(fit)

fit1 <- aov(TOTCHG ~ RACE,data=hosp_cost)

summary(fit1)

View(fit$fitted.values)

# Printing results

print("From R-squared value of linear model & p value of Analysis of Variance Model we can
conclude that race of the patient is not related to the hospitalization costs ")

#4. To properly utilize the costs, the agency has to analyze the severity of the hospital costs by age
and gender for proper allocation of resources.

table(hosp_cost$FEMALE)

a <- aov(TOTCHG ~ AGE+FEMALE,data=hosp_cost)

summary(a)

b <- lm(TOTCHG ~ AGE+FEMALE,data=hosp_cost)

summary(b)

# Printing results
print("From p value we can conclude that Age & gender have siginificane role in deciding cost")

#5. Since the length of stay is the crucial factor for inpatients, the agency wants to find if the length
of stay can be predicted from age, gender, and race.

table(hosp_cost$LOS)

cat <- aov(LOS ~ AGE+FEMALE+RACE,data=hosp_cost)

summary(cat)

cat <- lm(LOS ~ AGE+FEMALE+RACE,data=hosp_cost)

summary(cat)

print("From Variance test & linear model we can conclude that stay can not be predicted using age
gender or race")

#6. To perform a complete analysis, the agency wants to find the variable that mainly affects the
hospital costs.

aov(TOTCHG ~.,data=hosp_cost)

mod <- lm(TOTCHG ~ .,data=hosp_cost)

summary(mod)

print("From Variance test & linear model we can conclude that variables like Age,LOS & Aprdrg play
major role in the outcome of expanditure")

You might also like