From ecf4487337b8380ada75fa404875bd6860cdfebc Mon Sep 17 00:00:00 2001 From: witash Date: Fri, 6 Sep 2024 15:41:53 +0300 Subject: [PATCH] feat(#144): add macros to reduce boilerplate (#155) * feat(#144): form macro * feat(#144): contact macro * feat(#144): align person and place models with datasource * chore(#144): add unit test for contact type * fix(#144): fixing unit tests * fix(#144): fixing unit tests * fix: renaming test to tests * fix(#144): null condition for person --- macros/cht_contacts_model.sql | 75 ++++++++++++++++++ macros/cht_form_model.sql | 77 +++++++++++++++++++ models/contacts/contact.sql | 3 - models/contacts/contact_type.sql | 29 +++++++ models/contacts/contacts.yml | 25 ++++-- models/contacts/person.sql | 10 ++- models/contacts/place.sql | 8 +- models/contacts/tests/contact_types.yml | 17 ++++ models/contacts/tests/persons.yml | 3 + models/contacts/tests/places.yml | 3 + .../contact_document_metadata_initial.csv | 1 + .../contact/contact_initial_expected.csv | 11 +-- .../contact/contact_source_table_initial.csv | 1 + .../contact/contact_type_initial_expected.csv | 5 ++ .../person/person_initial_expected.csv | 7 +- .../person/person_source_table_initial.csv | 4 +- .../fixtures/place/place_contact_initial.csv | 2 +- .../fixtures/place/place_initial_expected.csv | 10 +-- .../place/place_source_table_initial.csv | 8 +- tests/sqltest/contact.sql | 2 +- 20 files changed, 262 insertions(+), 39 deletions(-) create mode 100644 macros/cht_contacts_model.sql create mode 100644 macros/cht_form_model.sql create mode 100644 models/contacts/contact_type.sql create mode 100644 models/contacts/tests/contact_types.yml create mode 100644 tests/fixtures/contact/contact_type_initial_expected.csv diff --git a/macros/cht_contacts_model.sql b/macros/cht_contacts_model.sql new file mode 100644 index 00000000..d195d0bf --- /dev/null +++ b/macros/cht_contacts_model.sql @@ -0,0 +1,75 @@ +-- a macro defining the reusable columns for all contact models +{% macro contact_columns() %} + contact.uuid as uuid, + contact.saved_timestamp, + contact.parent_uuid, + contact.reported, + contact.name +{% endmacro %} + +-- a macro defining the reusable indexes for columns above +{% macro contact_indexes() %} + {{ return([ + {'columns': ['uuid'], 'type': 'hash'}, + {'columns': ['saved_timestamp']}, + {'columns': ['parent_uuid']}, + {'columns': ['reported']}, + {'columns': ['name']} + ])}} +{% endmacro %} + +-- this macro creates a contact model +-- contact_type: the id of the contact_type to be selected +-- parents: a list of parent contacts to join to this table, in this format +-- [{'id': '', 'table': ''}, {'id': '', 'table':''}] +-- id: id of the contact_type that is this contacts parent +-- table: the table to join to; if the parent contact type has a custom model +-- otherwise, 'contact' to use the main contact table +-- custom_contact_columns: any columns specific to this contact model +-- custom_indexes: any indexes for the contact specific columns +{% macro cht_contact_model(contact_type, parents, custom_contact_columns, custom_indexes=[]) %} + -- combine any contact specific indexes with the general + {%- set all_indexes = contact_indexes() + custom_indexes -%} + + --- if parents have been given, add the hierarchy here + {% set columns = [] %} + {% set joins = [] %} + {% for i in range(0, parents|length) %} + {% set parent = parents[i] %} + {% set prev_parent = parents[i - 1]['id'] if i > 0 else 'contact' %} + + {% set parent_table = parent['table'] %} + {% set join_clause = "LEFT JOIN " ~ ref(parent_table) ~ " AS " ~ parent['id'] ~ " ON " ~ parent['id'] ~ ".uuid = " ~ prev_parent ~ ".parent_uuid" %} + {% do joins.append(join_clause) %} + + {% set column = parent['id'] ~ ".uuid AS " ~ parent['id'] %} + {% do columns.append(column) %} + {% endfor %} + + -- the actual select; a combination of + -- contact specific fields + -- the common data record fields + -- and the common data record from, join and where clause + -- config common to all contact models + {{ + config( + materialized='incremental', + unique_key='uuid', + on_schema_change='append_new_columns', + indexes=all_indexes + ) + }} + + SELECT + {{ contact_columns() }}, + {{ columns | join(',\n ') }}, + {{ custom_contact_columns }} + FROM {{ ref('contact') }} contact + INNER JOIN {{ env_var('POSTGRES_SCHEMA') }}.{{ env_var('POSTGRES_TABLE') }} couchdb ON couchdb._id = contact.uuid + {{ joins | join('\n') }} + WHERE + contact.contact_type = '{{ contact_type }}' + {% if is_incremental() %} + AND contact.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} + {% endif %} +{% endmacro %} diff --git a/macros/cht_form_model.sql b/macros/cht_form_model.sql new file mode 100644 index 00000000..cea98972 --- /dev/null +++ b/macros/cht_form_model.sql @@ -0,0 +1,77 @@ +-- a macro defining the reusable columns for all form models +{% macro data_record_columns() %} + data_record.uuid as uuid, + data_record.saved_timestamp, + data_record.contact_uuid as reported_by, + data_record.parent_uuid as reported_by_parent, + data_record.reported +{% endmacro %} + +-- a macro defining the reusable indexes for columns above +{% macro data_record_indexes() %} + {{ return([ + {'columns': ['uuid'], 'type': 'hash'}, + {'columns': ['saved_timestamp']}, + {'columns': ['reported_by']}, + {'columns': ['reported_by_parent']}, + {'columns': ['reported']} + ])}} +{% endmacro %} + +-- the from, join and where condition common to form models +-- selects the form from data record by name +-- joins back to the source table to get the document +-- and adds an incremental condition +{% macro data_record_join(form_name) %} + FROM {{ ref('data_record') }} data_record + INNER JOIN {{ env_var('POSTGRES_SCHEMA') }}.{{ env_var('POSTGRES_TABLE') }} couchdb ON couchdb._id = data_record.uuid + WHERE + data_record.form = '{{ form_name }}' + {% if is_incremental() %} + AND data_record.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} + {% endif %} +{% endmacro %} + +-- this macro creates a simple form model +-- form_name: the name of the form to be selected +-- form_columns: form specific columns +-- form_indexes: any indexes for the form specific columns +{% macro cht_form_model(form_name, form_columns, form_indexes=[]) %} + {{ cht_form_multi([{'form_name': form_name, 'form_columns': form_columns}], form_indexes) }} +{% endmacro %} + +-- this macro creates a model from a list of cht_forms +-- forms: [{ 'form_name': the name of the form to be selected, +-- 'form_columns': form specific columns }] +-- form_indexes: any indexes for the form specific columns +-- each of the forms will be UNIONED together +-- and should have the same custom columns +{% macro cht_form_multi(forms, form_indexes=[]) %} + -- combine any form specific indexes with the general + {%- set all_indexes = data_record_indexes() + form_indexes -%} + + -- config common to all form models + {{ + config( + materialized='incremental', + unique_key='uuid', + on_schema_change='append_new_columns', + indexes=all_indexes + ) + }} + + -- the actual select; a combination of + -- form specific fields + -- the common data record fields + -- and the common data record from, join and where clause + {% for form in forms %} + SELECT + {{ data_record_columns() }}, + {{ form.form_columns }} + {{ data_record_join(form.form_name) }} + -- if there is more than one form, union them together + {% if not loop.last %} + UNION + {% endif %} + {% endfor %} +{% endmacro %} diff --git a/models/contacts/contact.sql b/models/contacts/contact.sql index 54a6f49f..a5d07716 100644 --- a/models/contacts/contact.sql +++ b/models/contacts/contact.sql @@ -20,11 +20,8 @@ SELECT doc->'parent'->>'_id' AS parent_uuid, doc->>'name' AS name, COALESCE(doc->>'contact_type', doc->>'type') as contact_type, - doc->>'phone' AS phone, - doc->>'alternative_phone' AS phone2, doc->>'is_active' AS active, doc->>'notes' AS notes, - doc->>'contact_id' AS contact_id, NULLIF(doc->> 'muted', '') AS muted FROM {{ ref('document_metadata') }} document_metadata INNER JOIN diff --git a/models/contacts/contact_type.sql b/models/contacts/contact_type.sql new file mode 100644 index 00000000..50e4b8d8 --- /dev/null +++ b/models/contacts/contact_type.sql @@ -0,0 +1,29 @@ +{{ + config( + materialized = 'incremental', + unique_key='id', + on_schema_change='append_new_columns' + ) +}} + +WITH settings AS ( + SELECT + jsonb_array_elements(source_table.doc->'settings'->'contact_types') as element + FROM + {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table + WHERE _id = 'settings' +), +existing AS ( + SELECT + DISTINCT contact_type as id + FROM {{ ref('contact') }} +) +SELECT + COALESCE(settings.element->>'id', existing.id) as id, + CASE + WHEN id = 'person' THEN TRUE + ELSE COALESCE(settings.element->>'person', 'false')::boolean + END AS person, + (settings.element IS NOT NULL) AS configured +FROM settings +FULL OUTER JOIN existing ON existing.id = settings.element->>'id' diff --git a/models/contacts/contacts.yml b/models/contacts/contacts.yml index a7294c6e..6043866d 100644 --- a/models/contacts/contacts.yml +++ b/models/contacts/contacts.yml @@ -36,18 +36,23 @@ models: data_type: string data_tests: - not_null - - name: phone - data_type: string - - name: phone2 - data_type: string - name: active data_type: string - name: notes data_type: string - - name: contact_id - data_type: string - name: muted data_type: string + - name: contact_type + config: + contract: + enforced: true + columns: + - name: id + data_type: string + - name: person + data_type: boolean + - name: configured + data_type: boolean - name: person config: contract: @@ -70,6 +75,12 @@ models: data_type: date - name: sex data_type: string + - name: patient_id + data_type: string + - name: phone + data_type: string + - name: phone2 + data_type: string - name: place config: contract: @@ -89,6 +100,8 @@ models: data_type: timestamp - name: place_id data_type: string + - name: contact_id + data_type: string - name: patient config: contract: diff --git a/models/contacts/person.sql b/models/contacts/person.sql index 88315eda..72d9f74e 100644 --- a/models/contacts/person.sql +++ b/models/contacts/person.sql @@ -13,11 +13,15 @@ SELECT contact.uuid, contact.saved_timestamp, - (couchdb.doc->>'date_of_birth')::date as date_of_birth, - couchdb.doc->>'sex' as sex + NULLIF(couchdb.doc->>'date_of_birth', '')::date as date_of_birth, + couchdb.doc->>'sex' as sex, + couchdb.doc->>'phone' AS phone, + couchdb.doc->>'alternative_phone' AS phone2, + couchdb.doc->>'patient_id' as patient_id FROM {{ ref("contact") }} contact +INNER JOIN {{ ref('contact_type') }} contact_type ON contact_type.id = contact.contact_type INNER JOIN {{ source('couchdb', env_var('POSTGRES_TABLE')) }} couchdb ON couchdb._id = uuid -WHERE contact.contact_type = 'person' +WHERE contact_type.person = true {% if is_incremental() %} AND contact.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} {% endif %} diff --git a/models/contacts/place.sql b/models/contacts/place.sql index 4dbb991a..1b1f2683 100644 --- a/models/contacts/place.sql +++ b/models/contacts/place.sql @@ -14,14 +14,12 @@ SELECT uuid, contact.saved_timestamp, + couchdb.doc->>'contact_id' as contact_id, couchdb.doc->>'place_id' as place_id FROM {{ ref('contact') }} contact +INNER JOIN {{ ref('contact_type') }} contact_type ON contact_type.id = contact.contact_type INNER JOIN {{ source('couchdb', env_var('POSTGRES_TABLE')) }} couchdb ON couchdb._id = uuid -WHERE - ( - (couchdb.doc->>'place_id' IS NOT NULL) OR - (contact.contact_type <> 'person') - ) +WHERE contact_type.person = false {% if is_incremental() %} AND contact.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} {% endif %} diff --git a/models/contacts/tests/contact_types.yml b/models/contacts/tests/contact_types.yml new file mode 100644 index 00000000..77597695 --- /dev/null +++ b/models/contacts/tests/contact_types.yml @@ -0,0 +1,17 @@ +unit_tests: + - name: test_contact_types + description: | + This unit test validates the transformation logic in the `contact_type` model and ensures data integrity. + It uses fixture data for both `contact` and `source_table` to test the complete logic. + model: contact_type + given: + - input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}") + format: csv + fixture: person_source_table_initial + - input: ref('contact') + format: csv + fixture: contact_initial_expected + expect: + format: csv + fixture: contact_type_initial_expected + diff --git a/models/contacts/tests/persons.yml b/models/contacts/tests/persons.yml index 00572dfa..1f329be5 100644 --- a/models/contacts/tests/persons.yml +++ b/models/contacts/tests/persons.yml @@ -11,6 +11,9 @@ unit_tests: - input: ref('contact') format: csv fixture: person_contact_initial + - input: ref('contact_type') + format: csv + fixture: contact_type_initial_expected - input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}") format: csv fixture: person_source_table_initial diff --git a/models/contacts/tests/places.yml b/models/contacts/tests/places.yml index 9e17e8ff..17e4666e 100644 --- a/models/contacts/tests/places.yml +++ b/models/contacts/tests/places.yml @@ -11,6 +11,9 @@ unit_tests: - input: ref('contact') format: csv fixture: place_contact_initial + - input: ref('contact_type') + format: csv + fixture: contact_type_initial_expected - input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}") format: csv fixture: place_source_table_initial diff --git a/tests/fixtures/contact/contact_document_metadata_initial.csv b/tests/fixtures/contact/contact_document_metadata_initial.csv index 5711115f..30de5e7e 100644 --- a/tests/fixtures/contact/contact_document_metadata_initial.csv +++ b/tests/fixtures/contact/contact_document_metadata_initial.csv @@ -3,3 +3,4 @@ c1,2024-08-01 00:00:00,contact,false c2,2024-08-01 00:00:00,clinic,false c3,2024-08-02 00:00:00,person,false c4,2024-08-02 00:00:00,district_hospital,false +c5,2024-08-02 00:00:00,health_center,false diff --git a/tests/fixtures/contact/contact_initial_expected.csv b/tests/fixtures/contact/contact_initial_expected.csv index 416d3ed5..bad335a8 100644 --- a/tests/fixtures/contact/contact_initial_expected.csv +++ b/tests/fixtures/contact/contact_initial_expected.csv @@ -1,5 +1,6 @@ -uuid,saved_timestamp,reported,parent_uuid,name,contact_type,phone,phone2,active,notes,contact_id,muted -c1,2024-08-01 00:00:00,2024-07-31 08:00:00+00,p1,John Doe,person,12345,54321,true,Note 1,C-123,false -c2,2024-08-01 00:00:00,2024-07-31 08:00:00+00,p2,Jane Doe,clinic,67890,09876,true,Note 2,C-456,true -c3,2024-08-02 00:00:00,2024-07-31 08:00:00+00,p3,Mike Smith,person,11223,33211,false,Note 3,C-789,false -c4,2024-08-02 00:00:00,2024-07-31 08:00:00+00,p4,Sara Smith,district_hospital,44556,65544,true,Note 4,C-101,true +uuid,saved_timestamp,reported,parent_uuid,name,contact_type,active,notes,muted +c1,2024-08-01 00:00:00,2024-07-31 08:00:00+00,p1,John Doe,person,true,Note 1,false +c2,2024-08-01 00:00:00,2024-07-31 08:00:00+00,p2,Jane Doe,clinic,true,Note 2,true +c3,2024-08-02 00:00:00,2024-07-31 08:00:00+00,p3,Mike Smith,person,false,Note 3,false +c4,2024-08-02 00:00:00,2024-07-31 08:00:00+00,p4,Sara Smith,district_hospital,true,Note 4,true +c5,2024-08-02 00:00:00,2024-07-31 08:00:00+00,p5,James Munroe,health_center,true,Note 5,true diff --git a/tests/fixtures/contact/contact_source_table_initial.csv b/tests/fixtures/contact/contact_source_table_initial.csv index 5c7f0d15..9541a7bc 100644 --- a/tests/fixtures/contact/contact_source_table_initial.csv +++ b/tests/fixtures/contact/contact_source_table_initial.csv @@ -3,3 +3,4 @@ c1,2024-08-01 00:00:00,false,"{""reported_date"": ""1722412800000"", ""parent"": c2,2024-08-01 00:00:00,false,"{""reported_date"": ""1722412800000"", ""parent"": {""_id"": ""p2""}, ""name"": ""Jane Doe"", ""contact_type"": ""clinic"", ""phone"": ""67890"", ""alternative_phone"": ""09876"", ""is_active"": ""true"", ""notes"": ""Note 2"", ""contact_id"": ""C-456"", ""muted"": ""true""}" c3,2024-08-02 00:00:00,false,"{""reported_date"": ""1722412800000"", ""parent"": {""_id"": ""p3""}, ""name"": ""Mike Smith"", ""contact_type"": ""person"", ""phone"": ""11223"", ""alternative_phone"": ""33211"", ""is_active"": ""false"", ""notes"": ""Note 3"", ""contact_id"": ""C-789"", ""muted"": ""false""}" c4,2024-08-02 00:00:00,false,"{""reported_date"": ""1722412800000"", ""parent"": {""_id"": ""p4""}, ""name"": ""Sara Smith"", ""contact_type"": ""district_hospital"", ""phone"": ""44556"", ""alternative_phone"": ""65544"", ""is_active"": ""true"", ""notes"": ""Note 4"", ""contact_id"": ""C-101"", ""muted"": ""true""}" +c5,2024-08-02 00:00:00,false,"{""reported_date"": ""1722412800000"", ""parent"": {""_id"": ""p5""}, ""name"": ""James Munroe"", ""contact_type"": ""health_center"", ""phone"": ""77889"", ""alternative_phone"": ""88997"", ""is_active"": ""true"", ""notes"": ""Note 5"", ""contact_id"": ""C-103"", ""muted"": ""true""}" diff --git a/tests/fixtures/contact/contact_type_initial_expected.csv b/tests/fixtures/contact/contact_type_initial_expected.csv new file mode 100644 index 00000000..0f898fcd --- /dev/null +++ b/tests/fixtures/contact/contact_type_initial_expected.csv @@ -0,0 +1,5 @@ +id,person,configured +person,TRUE,FALSE +clinic,FALSE,FALSE +district_hospital,FALSE,FALSE +health_center,FALSE,FALSE diff --git a/tests/fixtures/person/person_initial_expected.csv b/tests/fixtures/person/person_initial_expected.csv index 886f8432..1321d031 100644 --- a/tests/fixtures/person/person_initial_expected.csv +++ b/tests/fixtures/person/person_initial_expected.csv @@ -1,4 +1,3 @@ -uuid,saved_timestamp,date_of_birth,sex -p1,2024-08-01 00:00:00,1980-01-01,M -p2,2024-08-01 00:00:00,1990-05-15,F - +uuid,saved_timestamp,date_of_birth,sex,phone,phone2 +p1,2024-08-01 00:00:00,1980-01-01,M,12345,54321 +p2,2024-08-01 00:00:00,1990-05-15,F,67890,9876 diff --git a/tests/fixtures/person/person_source_table_initial.csv b/tests/fixtures/person/person_source_table_initial.csv index b36b1f6c..91243ef4 100644 --- a/tests/fixtures/person/person_source_table_initial.csv +++ b/tests/fixtures/person/person_source_table_initial.csv @@ -1,5 +1,5 @@ _id,saved_timestamp,_deleted,doc -p1,2024-08-01 00:00:00,false,"{""date_of_birth"": ""1980-01-01"", ""sex"": ""M""}" -p2,2024-08-01 00:00:00,false,"{""date_of_birth"": ""1990-05-15"", ""sex"": ""F""}" +p1,2024-08-01 00:00:00,false,"{""date_of_birth"": ""1980-01-01"", ""sex"": ""M"", ""phone"": ""12345"", ""alternative_phone"": ""54321""}" +p2,2024-08-01 00:00:00,false,"{""date_of_birth"": ""1990-05-15"", ""sex"": ""F"", ""phone"": ""67890"", ""alternative_phone"": ""9876"" }" p3,2024-08-02 00:00:00,false,"{""type"": ""clinic""}" p4,2024-08-02 00:00:00,false,"{""type"": ""clinic""}" diff --git a/tests/fixtures/place/place_contact_initial.csv b/tests/fixtures/place/place_contact_initial.csv index a38e1ef6..e3b39d78 100644 --- a/tests/fixtures/place/place_contact_initial.csv +++ b/tests/fixtures/place/place_contact_initial.csv @@ -3,4 +3,4 @@ p1,2024-08-01 00:00:00,clinic p2,2024-08-01 00:00:00,health_center p3,2024-08-02 00:00:00,district_hospital p4,2024-08-02 00:00:00,person -p5,2024-08-02 00:00:00,contact +p5,2024-08-02 00:00:00,health_center diff --git a/tests/fixtures/place/place_initial_expected.csv b/tests/fixtures/place/place_initial_expected.csv index 391f3dff..9496ab44 100644 --- a/tests/fixtures/place/place_initial_expected.csv +++ b/tests/fixtures/place/place_initial_expected.csv @@ -1,5 +1,5 @@ -uuid,saved_timestamp,place_id -p1,2024-08-01 00:00:00,P-001 -p2,2024-08-01 00:00:00,P-002 -p3,2024-08-02 00:00:00,P-003 -p5,2024-08-02 00:00:00,P-004 +uuid,saved_timestamp,place_id,contact_id +p1,2024-08-01 00:00:00,P-001,C-123 +p2,2024-08-01 00:00:00,P-002,C-456 +p3,2024-08-02 00:00:00,P-003,C-789 +p5,2024-08-02 00:00:00,P-004,C-101 diff --git a/tests/fixtures/place/place_source_table_initial.csv b/tests/fixtures/place/place_source_table_initial.csv index 36781c54..29f08488 100644 --- a/tests/fixtures/place/place_source_table_initial.csv +++ b/tests/fixtures/place/place_source_table_initial.csv @@ -1,6 +1,6 @@ _id,saved_timestamp,_deleted,doc -p1,2024-08-01 00:00:00,false,"{""place_id"": ""P-001"", ""type"": ""clinic""}" -p2,2024-08-01 00:00:00,false,"{""place_id"": ""P-002"", ""type"": ""health_center""}" -p3,2024-08-02 00:00:00,false,"{""place_id"": ""P-003"", ""type"": ""district_hospital""}" +p1,2024-08-01 00:00:00,false,"{""place_id"": ""P-001"", ""type"": ""clinic"", ""contact_id"": ""C-123""}" +p2,2024-08-01 00:00:00,false,"{""place_id"": ""P-002"", ""type"": ""health_center"", ""contact_id"": ""C-456""}" +p3,2024-08-02 00:00:00,false,"{""place_id"": ""P-003"", ""type"": ""district_hospital"", ""contact_id"": ""C-789""}" p4,2024-08-02 00:00:00,false,"{""type"": ""person""}" -p5,2024-08-02 00:00:00,false,"{""place_id"": ""P-004"", ""type"": ""contact""}" +p5,2024-08-02 00:00:00,false,"{""place_id"": ""P-004"", ""type"": ""contact"", ""contact_type"": ""clinic"", ""contact_id"": ""C-101""}" diff --git a/tests/sqltest/contact.sql b/tests/sqltest/contact.sql index 71667fcb..11400652 100644 --- a/tests/sqltest/contact.sql +++ b/tests/sqltest/contact.sql @@ -13,5 +13,5 @@ WHERE OR -- fields dont match contact.parent_uuid <> couchdb.doc->'parent'->>'_id' OR contact.contact_type <> COALESCE(couchdb.doc->>'contact_type', couchdb.doc->>'type') OR - contact.phone <> couchdb.doc->>'phone' + contact.name <> couchdb.doc->>'name' )