Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
usr110 committed Nov 6, 2024
2 parents f1319d2 + 36806fe commit b63d7c6
Showing 1 changed file with 83 additions and 11 deletions.
94 changes: 83 additions & 11 deletions processing_hd.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
# Load libraries
library(tidyverse)
library(arrow)

# Set seed for reproducibility
set.seed(2024)

# Read cycleIntervention health output as an example

### Ali to bring in the fix dataset
# hd <- read_csv("C:/Users/ajnab/RMIT University/JIBE working group - General/manchester/simulationResults/ForUrbanTransition/cycleIntervention/health/04_death_and_disease/pp_healthDiseaseTracker_2029.csv")
### Ali to add cycling intervention health data.


## Belen folder
directory <- "C:/Users/mbzd2/OneDrive - RMIT University/JIBE/JIBE-WP6/healthmicrosim/"
Expand All @@ -18,7 +23,65 @@ health_base <- read_csv(paste0(directory, "manchester/simulationResults/ForUrban
# Same for quicker run
# shd <- hd |> slice_sample(n = 100)


# ####### Referecen data #######
#
# # Loop through each row
# for (i in 1:nrow(health_base)){
# # Create a long df for each row
# ur <- pivot_longer(health_base[i,], cols = -id)
# # Get a table for each disease state with the first year when it was appeared
# fv <- ur |> group_by(value) |> summarise(fy = first(name)) |> arrange(fy)
# # Loop through the first value dataset with first year for the disease appearance
# for (j in 1:nrow(fv)){
# # Ignore healty and null states
# if (!fv$value[j] %in% c("healthy", "null"))
# {
# # Do not carry forward the healthy state to any other state
# if (fv$value[j - 1] != "healthy")
# # Join previous state with the current one with a | as a separator
# fv$value[j] <- paste(fv$value[j], fv$value[j - 1], sep = "|")
# }
# }
# # Update the long format for each ID with the new states
# for (k in 1:nrow(fv)){
# if (!fv$value[k] %in% c("healthy", "null"))
# ur[(ur$name >= as.numeric(fv$fy[k]) & ur$name < (as.numeric(fv$fy[k + 1]))),]$value <- fv$value[k]
# }
#
# # Update the states
# health_base <- rows_update(health_base, pivot_wider(ur, id_cols=id))
# }
#
#
#
# ####### Scenario data #######
#
# # Loop through each row
# for (i in 1:nrow(health_intervention)){
# # Create a long df for each row
# ur <- pivot_longer(health_intervention[i,], cols = -id)
# # Get a table for each disease state with the first year when it was appeared
# fv <- ur |> group_by(value) |> summarise(fy = first(name)) |> arrange(fy)
# # Loop through the first value dataset with first year for the disease appearance
# for (j in 1:nrow(fv)){
# # Ignore healty and null states
# if (!fv$value[j] %in% c("healthy", "null"))
# {
# # Do not carry forward the healthy state to any other state
# if (fv$value[j - 1] != "healthy")
# # Join previous state with the current one with a | as a separator
# fv$value[j] <- paste(fv$value[j], fv$value[j - 1], sep = "|")
# }
# }
# # Update the long format for each ID with the new states
# for (k in 1:nrow(fv)){
# if (!fv$value[k] %in% c("healthy", "null"))
# ur[(ur$name >= as.numeric(fv$fy[k]) & ur$name < (as.numeric(fv$fy[k + 1]))),]$value <- fv$value[k]
# }
#
# # Update the states
# health_intervention <- rows_update(health_intervention, pivot_wider(ur, id_cols=id))
# }


### Optimised version
Expand All @@ -27,39 +90,48 @@ library(data.table)
library(dplyr)

# Convert health_base to data.table for fast processing


health_base_dt <- as.data.table(health_base)

# Melt (pivot_longer) the entire dataset once to create the long format
ur <- melt(health_base_dt, id.vars = "id", variable.name = "year", value.name = "state")

# For each id, determine the first year each disease state appears
# For each id, determine the first year each disease state appears, excluding "healthy" and "null"
fv <- ur[!(state %in% c("healthy", "null")), .(first_year = min(as.numeric(year))), by = .(id, state)]
setorder(fv, id, first_year)

# Concatenate previous states in a cumulative fashion within each id
# Concatenate previous states cumulatively within each id
fv[, cumulative_state := Reduce(function(x, y) paste(y, x, sep = "|"), state, accumulate = TRUE), by = id]

# Join the cumulative state information back to the main long format (ur)
# Merge cumulative states back to the main dataset
ur <- merge(ur, fv[, .(id, state, cumulative_state, first_year)], by = c("id", "state"), all.x = TRUE)

# Fill cumulative states forward based on the first year appearance within each id
ur[, cumulative_state := ifelse(
as.numeric(year) >= first_year, cumulative_state, NA_character_
# Create a final state column preserving "healthy" where applicable and setting "dead" to "null"
ur[, final_state := ifelse(
state == "healthy" | is.na(cumulative_state),
state, # Keep "healthy" or original if cumulative_state is NA
cumulative_state # Otherwise use cumulative state
), by = id]

# Use dplyr::fill to carry forward cumulative states (filling down within each id)
# Change any state that includes "dead" to "null"
ur[, final_state := ifelse(grepl("dead", final_state), "null", final_state)]

# Use dplyr::fill to carry forward cumulative states within each id
ur <- ur %>%
group_by(id) %>%
arrange(id, year) %>%
mutate(final_state = cumulative_state) %>%
mutate(final_state = ifelse(final_state == "null", NA, final_state)) %>% # Treat "null" as NA
fill(final_state, .direction = "down") %>%
replace_na(list(final_state = "healthy")) %>% # Replace any remaining NA with "healthy"
ungroup()

# Reshape back to wide format, retaining the modified states
health_base_updated <- dcast(setDT(ur), id ~ year, value.var = "final_state")

# Replace NA values with original "healthy" and "null" states if needed
# Ensure the original "healthy" and "null" states are preserved where they belong
health_base_updated[is.na(health_base_updated)] <- health_base[is.na(health_base_updated)]

# Assign the result back to the original health_base variable if desired
health_base <- health_base_updated
health_base_plot <- as.data.frame(health_base_updated)

0 comments on commit b63d7c6

Please sign in to comment.