diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R
index c636980a6..117acbaab 100644
--- a/R/fix_sc_dates.R
+++ b/R/fix_sc_dates.R
@@ -9,7 +9,7 @@
 #' @return A date vector with replaced end dates
 fix_sc_start_dates <- function(start_date, period_start) {
   # Fix sds_start_date is missing by setting start_date to be the start of
-  # financial year
+  # financial period
   start_date <- dplyr::if_else(
     is.na(start_date),
     period_start,
@@ -30,12 +30,12 @@ fix_sc_start_dates <- function(start_date, period_start) {
 #' @param period Social care latest submission period.
 #'
 #' @return A date vector with replaced end dates
-fix_sc_end_dates <- function(start_date, end_date, period) {
+fix_sc_end_dates <- function(start_date, end_date, period_end_date) {
   # Fix sds_end_date is earlier than sds_start_date by setting end_date to be
   # the end of financial year
   end_date <- dplyr::if_else(
     start_date > end_date,
-    end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"),
+    period_end_date,
     end_date
   )
 
@@ -57,7 +57,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) {
 #' @return A date vector with replaced end dates
 fix_sc_missing_end_dates <- function(end_date, period_end) {
   # Fix sds_end_date is earlier than sds_start_date by setting end_date to be
-  # the end of financial year
+  # the end of financial period
   end_date <- dplyr::if_else(
     is.na(end_date),
     period_end,
diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index 8c363f547..96adc985e 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -28,30 +28,46 @@ process_lookup_sc_demographics <- function(
     dplyr::pull(.data$pc7)
 
 
-  # Data Cleaning ---------------------------------------
-
+  #  Fill in missing data and flag latest cases to keep ---------------------------------------
   sc_demog <- data %>%
-    dplyr::mutate(
-      # use chi if upi is NA
-      upi = dplyr::coalesce(.data$upi, .data$chi_upi),
-      # check gender code - replace code 99 with 9
-      submitted_gender = replace(.data$submitted_gender, .data$submitted_gender == 99L, 9L)
+    dplyr::rename(
+      chi = chi_upi,
+      gender = chi_gender_code,
+      dob = chi_date_of_birth
     ) %>%
+    # fill in missing demographic details
+    dplyr::arrange(period, social_care_id) %>%
+    dplyr::group_by(social_care_id, sending_location) %>%
+    tidyr::fill(chi, .direction = ("updown")) %>%
+    tidyr::fill(dob, .direction = ("updown")) %>%
+    tidyr::fill(date_of_death, .direction = ("updown")) %>%
+    tidyr::fill(gender, .direction = ("updown")) %>%
+    tidyr::fill(chi_postcode, .direction = ("updown")) %>%
+    tidyr::fill(submitted_postcode, .direction = ("updown")) %>%
+    dplyr::ungroup() %>%
+    # format postcodes using `phsmethods`
+    dplyr::mutate(dplyr::across(tidyselect::contains("postcode"), ~ phsmethods::format_postcode(.x, format = "pc7"))) # are sc postcodes even used anywhere?
+
+
+  # flag unique cases of chi and sc_id, and flag the latest record (sc_demographics latest flag is not accurate)
+  sc_demog <- sc_demog %>%
+    dplyr::group_by(chi, sending_location) %>%
+    dplyr::mutate(latest = dplyr::last(period)) %>% # flag latest period for chi
+    dplyr::group_by(chi, social_care_id, sending_location) %>%
+    dplyr::mutate(latest_sc_id = dplyr::last(period)) %>% # flag latest period for social care
+    dplyr::group_by(chi, sending_location) %>%
+    dplyr::mutate(last_sc_id = dplyr::last(social_care_id)) %>%
     dplyr::mutate(
-      # use CHI sex if available
-      gender = dplyr::if_else(
-        is.na(.data$chi_gender_code) | .data$chi_gender_code == 9L,
-        .data$submitted_gender,
-        .data$chi_gender_code
-      ),
-      # Use CHI DoB if available
-      dob = dplyr::coalesce(.data$chi_date_of_birth, .data$submitted_date_of_birth)
+      latest_flag = ifelse((latest == period & last_sc_id == social_care_id) | is.na(chi), 1, 0),
+      keep = ifelse(latest_sc_id == period, 1, 0)
     ) %>%
-    # format postcodes using `phsmethods`
-    dplyr::mutate(dplyr::across(
-      tidyselect::contains("postcode"),
-      ~ phsmethods::format_postcode(.x, format = "pc7")
-    ))
+    dplyr::ungroup()
+
+  sc_demog <- sc_demog %>%
+    dplyr::select(-period, -latest_record_flag, -latest, -last_sc_id, -latest_sc_id) %>%
+    dplyr::distinct()
+
+  # postcodes ---------------------------------------------------------------
 
   # count number of na postcodes
   na_postcodes <- sc_demog %>%
@@ -69,29 +85,32 @@ process_lookup_sc_demographics <- function(
       ~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA)
     )) %>%
     dplyr::select(
-      "latest_record_flag",
-      "extract_date",
       "sending_location",
       "social_care_id",
-      "upi",
+      "chi",
       "gender",
       "dob",
+      "date_of_death",
       "submitted_postcode",
-      "chi_postcode"
+      "chi_postcode",
+      "keep",
+      "latest_flag"
     ) %>%
     # check if submitted_postcode matches with postcode lookup
     dplyr::mutate(
-      valid_pc = .data$submitted_postcode %in% valid_spd_postcodes
+      valid_pc_submitted = .data$submitted_postcode %in% valid_spd_postcodes,
+      valid_pc_chi = .data$chi_postcode %in% valid_spd_postcodes
     ) %>%
     # use submitted_postcode if valid, otherwise use chi_postcode
     dplyr::mutate(postcode = dplyr::case_when(
-      (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode,
-      (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode
+      (!is.na(.data$chi_postcode) & .data$valid_pc_chi) ~ .data$chi_postcode,
+      ((is.na(.data$chi_postcode) | !(.data$valid_pc_chi)) & !(is.na(.data$submitted_postcode)) & .data$valid_pc_submitted) ~ .data$submitted_postcode,
+      (is.na(.data$submitted_postcode) & !.data$valid_pc_submitted) ~ .data$chi_postcode
     )) %>%
     dplyr::mutate(postcode_type = dplyr::case_when(
-      (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted",
-      (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi",
-      (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing"
+      (postcode == chi_postcode) ~ "chi",
+      (postcode == submitted_postcode) ~ "submitted",
+      (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode) | is.na(.data$postcode)) ~ "missing"
     ))
 
   # Check where the postcodes are coming from
@@ -102,26 +121,32 @@ process_lookup_sc_demographics <- function(
   na_replaced_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x)))
 
-
   sc_demog_lookup <- sc_demog %>%
+    dplyr::filter(keep == 1) %>% # filter to only keep latest record for sc id and chi
+    dplyr::select(-postcode_type, -valid_pc_submitted, -valid_pc_chi, -submitted_postcode, -chi_postcode) %>%
+    dplyr::distinct() %>%
     # group by sending location and ID
-    dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
+    dplyr::group_by(.data$sending_location, .data$chi, .data$social_care_id, .data$latest_flag) %>%
     # arrange so latest submissions are last
     dplyr::arrange(
       .data$sending_location,
       .data$social_care_id,
-      .data$latest_record_flag,
-      .data$extract_date
+      .data$latest_flag
     ) %>%
     # summarise to select the last (non NA) submission
     dplyr::summarise(
-      chi = dplyr::last(.data$upi),
       gender = dplyr::last(.data$gender),
       dob = dplyr::last(.data$dob),
-      postcode = dplyr::last(.data$postcode)
+      postcode = dplyr::last(.data$postcode),
+      date_of_death = dplyr::last(.data$date_of_death)
     ) %>%
     dplyr::ungroup()
 
+  # check to make sure all cases of chi are still there
+  dplyr::n_distinct(sc_demog_lookup$chi) # 524810
+  dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404
+
+
   if (write_to_disk) {
     write_file(
       sc_demog_lookup,
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index f9ca52f24..453db3e40 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -15,14 +15,17 @@ process_sc_all_sds <- function(
     sc_demog_lookup,
     write_to_disk = TRUE) {
   # Match on demographics data (chi, gender, dob and postcode)
-  matched_sds_data <- data %>%
-    dplyr::left_join(
+  matched_sds_data <- data %>% #
+    dplyr::filter(.data$sds_start_date_after_period_end_date != 1) %>%
+    dplyr::right_join(
       sc_demog_lookup,
       by = c("sending_location", "social_care_id")
     ) %>%
     # when multiple social_care_id from sending_location for single CHI
     # replace social_care_id with latest
-    replace_sc_id_with_latest()
+    replace_sc_id_with_latest() %>%
+    dplyr::select(-latest_sc_id, -latest_flag, -sds_start_date_after_period_end_date) %>%
+    dplyr::distinct()
 
   # Data Cleaning ---------------------------------------
   sds_full_clean <- matched_sds_data %>%
@@ -50,7 +53,7 @@ process_sc_all_sds <- function(
         .data$sds_start_date,
         .data$sds_period_start_date
       ),
-      # If SDS end date is missing, assign end of FY
+      # If SDS end date is missing, assign end of financial period
       sds_end_date = fix_sc_missing_end_dates(
         .data$sds_end_date,
         .data$sds_period_end_date
@@ -59,14 +62,19 @@ process_sc_all_sds <- function(
       sds_end_date = fix_sc_end_dates(
         .data$sds_start_date,
         .data$sds_end_date,
-        .data$period
+        .data$sds_period_end_date
       )
     ) %>%
+    dplyr::select(
+      -sds_period_start_date, -sds_period_end_date,
+      -sds_start_date_after_end_date
+    ) %>%
     # rename for matching source variables
     dplyr::rename(
       record_keydate1 = .data$sds_start_date,
       record_keydate2 = .data$sds_end_date
     ) %>%
+    dplyr::distinct() %>%
     # Pivot longer on sds option variables
     tidyr::pivot_longer(
       cols = tidyselect::contains("sds_option_"),
@@ -103,6 +111,7 @@ process_sc_all_sds <- function(
     ) %>%
     dplyr::arrange(.data$period,
       .data$record_keydate1,
+      .data$record_keydate2,
       .by_group = TRUE
     ) %>%
     # Create a flag for episodes that are going to be merged
diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index fcdde5417..fe9a5e71f 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -12,16 +12,15 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn =
   ) %>%
     dplyr::select(
       "latest_record_flag",
-      "extract_date",
+      "period",
       "sending_location",
+      "sending_location_name",
       "social_care_id",
-      "upi",
       "chi_upi",
-      "submitted_postcode",
-      "chi_postcode",
-      "submitted_date_of_birth",
       "chi_date_of_birth",
-      "submitted_gender",
+      "date_of_death",
+      "chi_postcode",
+      "submitted_postcode",
       "chi_gender_code"
     ) %>%
     dplyr::collect() %>%
@@ -29,10 +28,10 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn =
       dplyr::across(c(
         "latest_record_flag",
         "sending_location",
-        "submitted_gender",
         "chi_gender_code"
       ), as.integer)
-    )
+    ) %>%
+    dplyr::distinct()
 
   return(sc_demog)
 }
diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R
index 18c5b52ec..ab9bb20e1 100644
--- a/R/read_sc_all_sds.R
+++ b/R/read_sc_all_sds.R
@@ -22,9 +22,8 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       "sds_option_1",
       "sds_option_2",
       "sds_option_3",
-      "sds_start_date_after_end_date",
-      "sds_start_date_after_period_end_date",
-      "sds_end_date_not_within_period"
+      "sds_start_date_after_end_date", # get fixed
+      "sds_start_date_after_period_end_date" # get removed
     ) %>%
     dplyr::collect() %>%
     dplyr::distinct() %>%
@@ -33,8 +32,7 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       "sds_option_1",
       "sds_option_2",
       "sds_option_3"
-    ), as.integer)) %>%
-    dplyr::filter(.data$sds_start_date_after_period_end_date != 1)
+    ), as.integer))
 
   return(sds_full_data)
 }
diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R
index 73c1a3706..2c32bbb93 100644
--- a/R/replace_sc_id_with_latest.R
+++ b/R/replace_sc_id_with_latest.R
@@ -7,33 +7,23 @@ replace_sc_id_with_latest <- function(data) {
   # Check for required variables
   check_variables_exist(
     data,
-    c("sending_location", "social_care_id", "chi", "period")
+    c("sending_location", "social_care_id", "chi", "latest_flag")
   )
 
   # select variables we need
   filter_data <- data %>%
     dplyr::select(
-      "sending_location", "social_care_id", "chi", "period"
+      "sending_location", "social_care_id", "chi", "latest_flag"
     ) %>%
-    dplyr::filter(!(is.na(.data$chi)))
+    dplyr::filter(!(is.na(.data$chi))) %>%
+    dplyr::distinct()
 
   change_sc_id <- filter_data %>%
-    # Sort (by sending_location, chi and period) for unique chi/sending location
-    dplyr::arrange(
-      .data$sending_location,
-      .data$chi,
-      dplyr::desc(.data$period)
-    ) %>%
-    # Find the latest sc_id for each chi/sending location by keeping latest period
-    dplyr::distinct(
-      .data$sending_location,
-      .data$chi,
-      .keep_all = TRUE
-    ) %>%
+    dplyr::filter(latest_flag == 1) %>%
     # Rename for latest sc id
     dplyr::rename(latest_sc_id = "social_care_id") %>%
-    # drop period for matching
-    dplyr::select(-"period")
+    # drop latest_flag for matching
+    dplyr::select(-"latest_flag")
 
   return_data <- change_sc_id %>%
     # Match back onto data
@@ -41,6 +31,7 @@ replace_sc_id_with_latest <- function(data) {
       by = c("sending_location", "chi"),
       multiple = "all"
     ) %>%
+    dplyr::filter(!(is.na(period))) %>%
     # Overwrite sc id with the latest
     dplyr::mutate(
       social_care_id = dplyr::if_else(
diff --git a/man/fix_sc_end_dates.Rd b/man/fix_sc_end_dates.Rd
index 1bf808bea..041751319 100644
--- a/man/fix_sc_end_dates.Rd
+++ b/man/fix_sc_end_dates.Rd
@@ -4,7 +4,7 @@
 \alias{fix_sc_end_dates}
 \title{Fix sc end dates}
 \usage{
-fix_sc_end_dates(start_date, end_date, period)
+fix_sc_end_dates(start_date, end_date, period_end_date)
 }
 \arguments{
 \item{start_date}{A vector containing dates.}