diff --git a/doc/dsdr/0001-joiners-as-poisson-process.md b/doc/dsdr/0001-joiners-as-poisson-process.md index 0c3cb32..19c6899 100644 --- a/doc/dsdr/0001-joiners-as-poisson-process.md +++ b/doc/dsdr/0001-joiners-as-poisson-process.md @@ -14,7 +14,7 @@ general population for each age as is done in witan.send. ## Status -Accepted +Accepted. ## Consequences diff --git a/doc/dsdr/0006-apply-birthdays.md b/doc/dsdr/0006-apply-birthdays.md new file mode 100644 index 0000000..843f9de --- /dev/null +++ b/doc/dsdr/0006-apply-birthdays.md @@ -0,0 +1,17 @@ +### Applying birthdays with only a year of birth + +## Context + +In order to aid in preventing re-identification episode data only birth years have been provided. However in order to provide a more realistic distribution of ages and birthdays for modelling, arbitary birth dates are required. + +## Decision + +Each projection will use randomly generated birthdays with corresponding random ages of admission, EACH CONTROLLED WITH A SEED (not yet implemented). This allows the output to account for uncertainty in the input. + +## Status + +Accepted. + +## Consequences + +Actual counts of individuals at particular ages are simply approximations and only generally reflect the "real" population. This methodology does not capture any trends that may be present in age distribution, for example if more children are born in August. diff --git a/test/cic/core_test.clj b/test/cic/core_test.clj index 47f951f..f71b1c9 100644 --- a/test/cic/core_test.clj +++ b/test/cic/core_test.clj @@ -23,32 +23,33 @@ (testing "3 unmodelled episodes removed corresponding to UASC, V3 and V4" (is (= 7 (count (remove-unmodelled-episodes data)))))) -#_(deftest episodes-test - (testing "remove invalid records" - (is (= 5 (count (episodes data)))))) - - -#_(deftest assoc-period-id-test - (testing "id 120 consists of 3 periods" - (let [child (filter #(= (:child-id %) 120) (episodes data)) - result (assoc-period-id child)] - (is (= 3 (count (distinct result)))) - (is (= '("120-0" "120-1" "120-2") (map :period-id result)))))) - - -#_(deftest summarise-periods-at-test - (let [result (->> (episodes data) - (assoc-period-id) - (group-by :period-id) - (vals) - (map #(summarise-periods-at % projection-start)))] - (testing "a single open episode" - (is (= 1 (count (filter #(= true (:open? %)) result))))) - (testing "duration in care calculated" - (is (= 1550 (:duration (first (filter #(= (:period-id %) "120-0") result)))))) - (testing "multiple episodes in a period" - (is (= [{:offset 0, :placement :K1} {:offset 8, :placement :K2}] - (->> result - (filter #(= (:period-id %) "120-2")) - first - :episodes)))))) +(deftest episodes-test + (testing "remove invalid records" + (is (= 6 (count (episodes data)))))) + + +(deftest assoc-period-id-test + (testing "id 120 consists of 3 periods" + (let [child (filter #(= (:child-id %) 120) (episodes data)) + result (assoc-period-id child)] + (is (= 4 (count (distinct result)))) + (is (= '("120-0" "120-1" "120-2") (distinct (map :period-id result))))))) + + +(deftest summarise-periods-test + (let [result (->> (episodes data) + (assoc-period-id) + (group-by :period-id) + (vals) + (map (comp #(assoc-open-at % projection-start) + summarise-periods)))] + (testing "remove open episodes" + (is (= (- (count (episodes data)) 1) (count result)))) + (testing "duration in care calculated" + (is (= 8 (:duration (first (filter #(= (:period-id %) "120-0") result)))))) + (testing "multiple episodes in a period" + (is (= [{:offset 0, :placement :K1} {:offset 8, :placement :K2}] + (->> result + (filter #(= (:period-id %) "120-2")) + first + :episodes)))))) diff --git a/test/cic/projection_test.clj b/test/cic/projection_test.clj new file mode 100644 index 0000000..5ca4e06 --- /dev/null +++ b/test/cic/projection_test.clj @@ -0,0 +1,96 @@ +(ns cic.projection-test + (:require [cic.projection :refer :all] + [clojure.test :refer :all] + [cic.core :as c] + [cic.model :as m] + [clj-time.core :as t] + [clj-time.format :as f] + [clojure.test.check.random :as r] + [kixi.stats.distribution :as d] + [kixi.stats.protocols :as p])) + +(def example-data '({:sex "2", :care-status "N1", :legal-status "C2", :uasc "False", :dob "1999", :ceased "2017-02-18", :id "120", :report-year "2017", :placement "K1", :report-date "2017-02-10"} + {:sex "2", :care-status "N1", :legal-status "C2", :uasc "False", :dob "1999", :ceased "2017-04-18", :id "120", :report-year "2017", :placement "K2", :report-date "2017-02-18"} + {:sex "2", :care-status "N1", :legal-status "C2", :uasc "False", :dob "1999", :ceased "2015-02-18", :id "120", :report-year "2015", :placement "U1", :report-date "2015-02-10"} + {:sex "2", :care-status "N1", :legal-status "C2", :uasc "False", :dob "1999", :ceased "2014-02-18", :id "120", :report-year "2014", :placement "U1", :report-date "2014-02-10"} + {:sex "2", :care-status "B1", :legal-status "C1", :uasc "False", :dob "2000", :ceased nil, :id "121", :report-year "2017", :placement "U1", :report-date "2017-02-10"} + {:sex "2", :care-status "N1", :legal-status "C2", :uasc "False", :dob "1998", :ceased "2017-05-18", :id "122", :report-year "2017", :placement "U2", :report-date "2017-05-10"} + {:sex "2", :care-status "N1", :legal-status "C2", :uasc "False", :dob "1998", :ceased nil, :id "122", :report-year "2018", :placement "U2", :report-date "2018-05-10"} + {:sex "2", :care-status "N1", :legal-status "C2", :uasc "True", :dob "1999", :ceased "2017-07-18", :id "124", :report-year "2017", :placement "U2", :report-date "2017-06-10"} + {:sex "2", :care-status "N1", :legal-status "V3", :uasc "True", :dob "1999", :ceased "2017-07-18", :id "124", :report-year "2017", :placement "U2", :report-date "2017-06-10"} + {:sex "2", :care-status "N1", :legal-status "V4", :uasc "True", :dob "1999", :ceased "2017-07-18", :id "124", :report-year "2017", :placement "U2", :report-date "2017-06-10"})) + +(def seed (r/make-random 50)) + +(def example (prepare-ages (->> example-data + (map c/format-episode) + c/episodes + c/episodes->periods) seed)) + +(defn duration-model-for-testing + "Given an admitted date and age of a child in care, + returns an expected duration in days - copied from + witan.cic.model" + [coefs] + (fn [age seed] + (let [empirical (get coefs (max 0 (min age 17))) + [r1 r2] (r/split seed) + quantile (int (p/sample-1 (d/uniform {:a 1 :b 3}) r1)) + [lower median upper] (get empirical quantile)] + (m/sample-ci lower median upper r2)))) + +(def d-model (duration-model-for-testing {0 [[0 0 0] [1 6 17] [35 56 83]] + 1 [[0 0 0] [1 6 17] [35 56 83]] + 2 [[0 0 0] [1 6 17] [35 56 83]] + 3 [[0 0 0] [1 6 17] [35 56 83]] + 4 [[0 0 0] [1 6 17] [35 56 83]] + 5 [[0 0 0] [1 6 17] [35 56 83]] + 6 [[0 0 0] [1 6 17] [35 56 83]] + 7 [[0 0 0] [1 6 17] [35 56 83]] + 8 [[0 0 0] [1 6 17] [35 56 83]] + 9 [[0 0 0] [1 6 17] [35 56 83]] + 10 [[0 0 0] [1 6 17] [35 56 83]] + 11 [[0 0 0] [1 6 17] [35 56 83]] + 12 [[0 0 0] [1 6 17] [35 56 83]] + 13 [[0 0 0] [1 6 17] [35 56 83]] + 14 [[0 0 0] [1 6 17] [35 56 83]] + 15 [[0 0 0] [1 6 17] [35 56 83]] + 16 [[0 0 0] [1 6 17] [35 56 83]] + 17 [[0 0 0] [1 6 17] [35 56 83]] + 18 [[0 0 0] [1 6 17] [35 56 83]]})) + +(def date-format + (f/formatter :date)) + +(deftest prepare-ages-test + (testing "birthday is within correct year" + (is (= 14 (t/in-years (t/interval (:birthday (first example)) (t/date-time 2014)))))) + (testing "admission age in correctly calculated" + (is (= (t/in-years (t/interval (t/date-time 1999) (t/date-time 2014))) + (:admission-age (first example)))))) + +(deftest days-seq-test + (let [start (f/parse date-format "2010-03-31") + end (f/parse date-format "2010-04-30")] + (testing "a months period creates date for 4 weeks + 1 initial week" + (is (= 5 (count (day-seq start end))))))) + +(deftest daily-summary-test + (let [result (daily-summary example + (f/parse date-format "2015-02-10") + (f/parse date-format "2015-02-28"))] + (testing "frequency count of one 15yr old in Q1 for two weeks out of a possible three" + (= 3 (count result)) + (is (= 2 (reduce + (map #(get (val %) 15) result)))) + (is (= 2 (reduce + (map #(get (val %) :Q1) result))))))) + +(def e-model (m/episodes-model example)) + +(deftest project-period-close-test + (let [open-data (-> example first (assoc :open? true)) + result (project-period-close d-model e-model open-data seed)] + (testing "duration increases" + (is (> (:duration result) (:duration (first example))))) + (testing "duration is different with alternate seed" + (is (not= (:duration (project-period-close d-model e-model open-data (r/make-random 49))) + (:duration (first example)))))))