From 36806fe4c57dc049cae0cba1ac341d921f06f319 Mon Sep 17 00:00:00 2001 From: "B. Zapata-Diomedi" Date: Wed, 6 Nov 2024 09:50:47 +0000 Subject: [PATCH] optimised code --- processing_hd.R | 126 +++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 60 deletions(-) diff --git a/processing_hd.R b/processing_hd.R index e78b944..2e27926 100644 --- a/processing_hd.R +++ b/processing_hd.R @@ -6,7 +6,11 @@ library(arrow) set.seed(2024) # Read cycleIntervention health output as an example + +### Ali to bring in the fix dataset # hd <- read_csv("C:/Users/ajnab/RMIT University/JIBE working group - General/manchester/simulationResults/ForUrbanTransition/cycleIntervention/health/04_death_and_disease/pp_healthDiseaseTracker_2029.csv") +### Ali to add cycling intervention health data. + ## Belen folder directory <- "C:/Users/mbzd2/OneDrive - RMIT University/JIBE/JIBE-WP6/healthmicrosim/" @@ -19,65 +23,65 @@ health_base <- read_csv(paste0(directory, "manchester/simulationResults/ForUrban # Same for quicker run # shd <- hd |> slice_sample(n = 100) -####### Referecen data ####### - -# Loop through each row -for (i in 1:nrow(health_base)){ - # Create a long df for each row - ur <- pivot_longer(health_base[i,], cols = -id) - # Get a table for each disease state with the first year when it was appeared - fv <- ur |> group_by(value) |> summarise(fy = first(name)) |> arrange(fy) - # Loop through the first value dataset with first year for the disease appearance - for (j in 1:nrow(fv)){ - # Ignore healty and null states - if (!fv$value[j] %in% c("healthy", "null")) - { - # Do not carry forward the healthy state to any other state - if (fv$value[j - 1] != "healthy") - # Join previous state with the current one with a | as a separator - fv$value[j] <- paste(fv$value[j], fv$value[j - 1], sep = "|") - } - } - # Update the long format for each ID with the new states - for (k in 1:nrow(fv)){ - if (!fv$value[k] %in% c("healthy", "null")) - ur[(ur$name >= as.numeric(fv$fy[k]) & ur$name < (as.numeric(fv$fy[k + 1]))),]$value <- fv$value[k] - } - - # Update the states - health_base <- rows_update(health_base, pivot_wider(ur, id_cols=id)) -} - - - -####### Scenario data ####### - -# Loop through each row -for (i in 1:nrow(health_intervention)){ - # Create a long df for each row - ur <- pivot_longer(health_intervention[i,], cols = -id) - # Get a table for each disease state with the first year when it was appeared - fv <- ur |> group_by(value) |> summarise(fy = first(name)) |> arrange(fy) - # Loop through the first value dataset with first year for the disease appearance - for (j in 1:nrow(fv)){ - # Ignore healty and null states - if (!fv$value[j] %in% c("healthy", "null")) - { - # Do not carry forward the healthy state to any other state - if (fv$value[j - 1] != "healthy") - # Join previous state with the current one with a | as a separator - fv$value[j] <- paste(fv$value[j], fv$value[j - 1], sep = "|") - } - } - # Update the long format for each ID with the new states - for (k in 1:nrow(fv)){ - if (!fv$value[k] %in% c("healthy", "null")) - ur[(ur$name >= as.numeric(fv$fy[k]) & ur$name < (as.numeric(fv$fy[k + 1]))),]$value <- fv$value[k] - } - - # Update the states - health_intervention <- rows_update(health_intervention, pivot_wider(ur, id_cols=id)) -} +# ####### Referecen data ####### +# +# # Loop through each row +# for (i in 1:nrow(health_base)){ +# # Create a long df for each row +# ur <- pivot_longer(health_base[i,], cols = -id) +# # Get a table for each disease state with the first year when it was appeared +# fv <- ur |> group_by(value) |> summarise(fy = first(name)) |> arrange(fy) +# # Loop through the first value dataset with first year for the disease appearance +# for (j in 1:nrow(fv)){ +# # Ignore healty and null states +# if (!fv$value[j] %in% c("healthy", "null")) +# { +# # Do not carry forward the healthy state to any other state +# if (fv$value[j - 1] != "healthy") +# # Join previous state with the current one with a | as a separator +# fv$value[j] <- paste(fv$value[j], fv$value[j - 1], sep = "|") +# } +# } +# # Update the long format for each ID with the new states +# for (k in 1:nrow(fv)){ +# if (!fv$value[k] %in% c("healthy", "null")) +# ur[(ur$name >= as.numeric(fv$fy[k]) & ur$name < (as.numeric(fv$fy[k + 1]))),]$value <- fv$value[k] +# } +# +# # Update the states +# health_base <- rows_update(health_base, pivot_wider(ur, id_cols=id)) +# } +# +# +# +# ####### Scenario data ####### +# +# # Loop through each row +# for (i in 1:nrow(health_intervention)){ +# # Create a long df for each row +# ur <- pivot_longer(health_intervention[i,], cols = -id) +# # Get a table for each disease state with the first year when it was appeared +# fv <- ur |> group_by(value) |> summarise(fy = first(name)) |> arrange(fy) +# # Loop through the first value dataset with first year for the disease appearance +# for (j in 1:nrow(fv)){ +# # Ignore healty and null states +# if (!fv$value[j] %in% c("healthy", "null")) +# { +# # Do not carry forward the healthy state to any other state +# if (fv$value[j - 1] != "healthy") +# # Join previous state with the current one with a | as a separator +# fv$value[j] <- paste(fv$value[j], fv$value[j - 1], sep = "|") +# } +# } +# # Update the long format for each ID with the new states +# for (k in 1:nrow(fv)){ +# if (!fv$value[k] %in% c("healthy", "null")) +# ur[(ur$name >= as.numeric(fv$fy[k]) & ur$name < (as.numeric(fv$fy[k + 1]))),]$value <- fv$value[k] +# } +# +# # Update the states +# health_intervention <- rows_update(health_intervention, pivot_wider(ur, id_cols=id)) +# } ### Optimised version @@ -86,6 +90,8 @@ library(data.table) library(dplyr) # Convert health_base to data.table for fast processing + + health_base_dt <- as.data.table(health_base) # Melt (pivot_longer) the entire dataset once to create the long format @@ -127,5 +133,5 @@ health_base_updated <- dcast(setDT(ur), id ~ year, value.var = "final_state") health_base_updated[is.na(health_base_updated)] <- health_base[is.na(health_base_updated)] # Assign the result back to the original health_base variable if desired -health_base_plot <- health_base_updated +health_base_plot <- as.data.frame(health_base_updated)