# Used Libraries
library(dplyr)
# Loading the dataset
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
# Extracting and cleaning useful columns
hospitals <- data |>
mutate(
heart_attack = Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack,
heart_failure = Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure,
pneumonia = Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia,
State,
hospital_name = Hospital.Name,
.keep = "none"
)
# Converting the values back to numeric and handling the `Not Available` cases
outcomes <- c("heart_attack", "heart_failure", "pneumonia")
hospitals[outcomes] <- lapply(hospitals[outcomes], function(x) {
as.numeric(ifelse(x == "Not Available", NA, x))
})
Introduction
In February 4, 2024, I completed the R Programming course which proved to be quite a challenge as a beginner at the time. Five months later, I wanted to give it another try to simplify the functions required to pass for other learners that may have encountered complicated solutions online.
Required files : Dataset - Dataset Documentation - Assignment
Preparing Data
Finding the best hospital in a state
Code
best <- function(state, outcome) {
if (!(state %in% hospitals$State)) {
return("invalid state") # Replace with stop() to comply with the assignment conditions
}
outcome <- tolower(outcome)
outcome_col <- switch(outcome,
"heart attack" = "heart_attack",
"heart failure" = "heart_failure",
"pneumonia" = "pneumonia",
return("invalid outcome")) # Replace with stop() to comply with the assignment conditions
hospitals |>
arrange(hospital_name) |>
filter(State == state) |>
filter(!is.na(!!sym(outcome_col))) |>
slice_min(!!sym(outcome_col), with_ties = FALSE) |>
pull(hospital_name)
}
After checking whether the state abbreviation inserted by the user is found or not, a best practice is converting the demanded outcome to lowercase.
filter(!is.na(!!sym(outcome_col)))
is used to filter any NA values in the specified outcome column using sym()
that converts a string into a symbol which then gets unquoted by !!
, for more information check the documentation by clicking on sym()
.
Testing
best("TX", "heart attack")
[1] "CYPRESS FAIRBANKS MEDICAL CENTER"
best("TX", "heart failure")
[1] "FORT DUNCAN MEDICAL CENTER"
best("MD", "heart attack")
[1] "JOHNS HOPKINS HOSPITAL, THE"
best("MD", "pneumonia")
[1] "GREATER BALTIMORE MEDICAL CENTER"
best("BB", "heart attack")
[1] "invalid state"
best("NY", "hert attack")
[1] "invalid outcome"
Ranking hospitals by outcome in a state
Code
rankhospital <- function(state, outcome, num = "best") {
if(!(state %in% hospitals$State)) {
return("invalid state")
}
outcome <- tolower(outcome)
outcome_col <- switch(outcome,
"heart attack" = "heart_attack",
"heart failure" = "heart_failure",
"pneumonia" = "pneumonia",
return("invalid outcome"))
state_df <- hospitals %>%
filter(State == state) %>%
arrange(!!sym(outcome_col), hospital_name)
if(num == "best") {
state_df |>
slice_min(!!sym(outcome_col), with_ties = FALSE, na_rm = T) |>
pull(hospital_name)
}
else if(num == "worst") {
state_df |>
slice_max(!!sym(outcome_col), with_ties = FALSE, na_rm = T) |>
pull(hospital_name)
}
else if(is.numeric(num)) {
if(num > 0 && num <= nrow(state_df)) {
return(state_df$hospital_name[num])
}
else {
return(NA)
}
}
}
To match the requirements for handling ties and required conditions, we must sort the values by the outcome and hospital name columns followed by using na_rm
and with_ties
arguments in slice_min()
and slice_max()
which return, combined with pull()
, the demanded rows.
Testing
rankhospital("TX", "heart failure", 4)
[1] "DETAR HOSPITAL NAVARRO"
rankhospital("MD", "heart attack", "worst")
[1] "HARFORD MEMORIAL HOSPITAL"
rankhospital("MN", "heart attack", 5000)
[1] NA
rankhospital("NY", "pneumonia", 10)
[1] "SISTERS OF CHARITY HOSPITAL"
Ranking hospitals in all states
Code
rankall <- function(outcome, num = "best"){
outcome <- tolower(outcome)
outcome_col <- switch(outcome,
"heart attack" = "heart_attack",
"heart failure" = "heart_failure",
"pneumonia" = "pneumonia",
return("invalid outcome"))
state_df <- hospitals |>
arrange(!!sym(outcome_col), hospital_name) |>
group_by(State)
if(num == "best"){
state_df |>
slice_min(!!sym(outcome_col), with_ties = FALSE, na_rm = T) |>
select(hospital = hospital_name, state = State)
}
else if(num == "worst"){
state_df |>
slice_max(!!sym(outcome_col), with_ties = FALSE, na_rm = T) |>
select(hospital = hospital_name, state = State)
}
else if(is.numeric(num)) {
if(num > 0) {
state_df |>
slice_min(!!sym(outcome_col),
n = num,
with_ties = FALSE,
na_rm = T) |>
slice(num) |>
select(hospital = hospital_name, state = State) |>
right_join(data.frame(state = unique(hospitals$State))) |>
arrange(state)
}
else{
return("Invalid num")
}
}
}
Often solved with too many lines of code, the case of numeric values for the ranking is easier dealt with by using the n
argument that controls the number of rows to display and slice()
that retrieves them using the index which is the num
value in this case.
However the function will ignore states having less hospitals than the demanded ranking (num
argument), that’s why we use right_join()
to add them back followed by arrange()
to match the output sample in the assignment document.
Testing
tail(rankall("heart failure"), 10)
hospital | state |
---|---|
WELLMONT HAWKINS COUNTY MEMORIAL HOSPITAL | TN |
FORT DUNCAN MEDICAL CENTER | TX |
VA SALT LAKE CITY HEALTHCARE - GEORGE E. WAHLEN VA MEDICAL CENTER | UT |
SENTARA POTOMAC HOSPITAL | VA |
GOV JUAN F LUIS HOSPITAL & MEDICAL CTR | VI |
SPRINGFIELD HOSPITAL | VT |
HARBORVIEW MEDICAL CENTER | WA |
AURORA ST LUKES MEDICAL CENTER | WI |
FAIRMONT GENERAL HOSPITAL | WV |
CHEYENNE VA MEDICAL CENTER | WY |
tail(rankall("pneumonia", "worst"), 3)
hospital | state |
---|---|
MAYO CLINIC HEALTH SYSTEM - NORTHLAND, INC | WI |
PLATEAU MEDICAL CENTER | WV |
NORTH BIG HORN HOSPITAL DISTRICT | WY |
head(rankall("heart attack", 20), 10)
hospital | state |
---|---|
NA | AK |
D W MCMILLAN MEMORIAL HOSPITAL | AL |
ARKANSAS METHODIST MEDICAL CENTER | AR |
JOHN C LINCOLN DEER VALLEY HOSPITAL | AZ |
SHERMAN OAKS HOSPITAL | CA |
SKY RIDGE MEDICAL CENTER | CO |
MIDSTATE MEDICAL CENTER | CT |
NA | DC |
NA | DE |
SOUTH FLORIDA BAPTIST HOSPITAL | FL |