From 819e4fb47c2862a56a48e83c5a60cd620852529b Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 20 Jun 2023 08:14:17 +0000
Subject: [PATCH 01/40] Work in progress

---
 .../csww_main_functions.py                    |  94 +++++++++
 .../lds_csww_clean/configuration.py           |  80 ++++++++
 .../lds_csww_clean/csww_record.py             | 179 ++++++++++++++++++
 .../lds_csww_clean/file_creator.py            |  62 ++++++
 .../lds_csww_clean/schema.py                  |  17 ++
 5 files changed, 432 insertions(+)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index c68a681c..3184c26f 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,9 +1,35 @@
+import logging
+import click_log
+import click as click
+from pathlib import Path
+from datetime import datetime
+
 from liiatools.datasets.social_work_workforce.sample_data import (
     generate_sample_csww_file,
 )
 from liiatools.csdatatools.util.stream import consume
 from liiatools.csdatatools.util.xml import etree, to_xml
 
+# Dependencies for cleanfile()
+from liiatools.csdatatools.util.xml import dom_parse
+from liiatools.csdatatools.datasets.cincensus import filters
+from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import Schema
+
+from liiatools.datasets.social_work_workforce.lds_csww_clean import (
+    file_creator,
+    configuration as clean_config,
+    csww_record
+)
+
+from liiatools.datasets.shared_functions.common import (
+    flip_dict,
+    check_file_type,
+    supported_file_types,
+    check_year,
+    check_year_within_range,
+    save_year_error,
+    save_incorrect_year_error
+)
 
 def generate_sample(output: str):
     """
@@ -25,3 +51,71 @@ def generate_sample(output: str):
             FILE.write(element)
     except FileNotFoundError:
         print("The file path provided does not exist")
+
+def cleanfile(input, la_code, la_log_dir, output):
+    """
+    Cleans input Children Social Work workforce xml files according to config and outputs cleaned csv files.
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param la_code: should be a three-letter string for the local authority depositing the file
+    :param la_log_dir: should specify the path to the local authority's log folder
+    :param output: should specify the path to the output folder
+    :return: None
+    """
+
+    # Open & Parse file
+    print("Starting # Open & Parse file")
+    if (
+        check_file_type(
+            input,
+            file_types=[".xml"],
+            supported_file_types=supported_file_types,
+            la_log_dir=la_log_dir,
+        )
+        == "incorrect file type"
+    ):
+        return
+    stream = dom_parse(input)
+    stream = list(stream)
+
+    # Get year from input file
+    try:
+        filename = str(Path(input).resolve().stem)
+        input_year = check_year(filename)
+    except (AttributeError, ValueError):
+        save_year_error(input, la_log_dir)
+        return
+
+    # Check year is within acceptable range for data retention policy
+    print("Starting # Check year")
+    years_to_go_back = 6
+    year_start_month = 6
+    reference_date = datetime.now()
+    if check_year_within_range(input_year, years_to_go_back, year_start_month, reference_date) is False:
+        save_incorrect_year_error(input, la_log_dir)
+        return
+
+    # Configure stream
+    print("Starting # Configure stream")
+    config = clean_config.Config()
+    la_name = flip_dict(config["data_codes"])[la_code]
+    stream = filters.strip_text(stream)
+    stream = filters.add_context(stream)
+    stream = filters.add_schema(stream, schema=Schema(input_year).schema)
+
+    # Output result
+    #print("Starting # Output result")
+    stream = csww_record.message_collector(stream) # <=== this is the problem - not returning any stream data
+    #print(f"Stream = {stream}")
+    data = csww_record.export_table(stream)
+    #print(f"Data = {data}")
+    data = file_creator.add_fields(input_year, data, la_name, la_code)
+    #print(data)
+    file_creator.export_file(input, output, data)
+
+cleanfile("/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+            "BAD",
+            "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+            "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean"
+            )
+
+print("===> Finished running csww_main_functions.py")
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py
new file mode 100644
index 00000000..8d3f291c
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py
@@ -0,0 +1,80 @@
+import datetime
+import logging
+import os
+from pathlib import Path
+import yaml
+from string import Template
+
+from liiatools.spec import common as common_asset_dir
+
+log = logging.getLogger(__name__)
+
+COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
+
+
+class Config(dict):
+    def __init__(self, *config_files):
+        super().__init__()
+
+        if not config_files:
+            config_files = [
+                "DEFAULT_DATA_CODES",
+            ]
+
+        for file in config_files:
+            if file == "DEFAULT_DATA_CODES":
+                file = COMMON_CONFIG_DIR / "LA-codes.yml"
+            self.load_config(file, conditional=False)
+
+        self["config_date"] = datetime.datetime.now().isoformat()
+        try:
+            self["username"] = os.getlogin()
+        except OSError:
+            # This happens when tests are not run under a login shell, e.g. CI pipeline
+            pass
+
+    def load_config(self, filename, conditional=False, warn=False):
+        """
+        Load configuration from yaml file. Any loaded configuration
+        is only set if the values don't already exist in CONFIG.
+
+        Files can contain ${} placeholders following the Python string.Template format.
+        The context will include any keys already existing in the configuration, any keys
+        from the current file - however, if these include placeholders, the placeholders
+        will not be replaced. Finally, environment variables can be referenced with
+        `os_environ_VARIABLE_NAME`.
+
+        Keyword arguments:
+        filename -- Filename to load from
+        conditional -- If True, ignore file if it doesn't exist. If False, fail. (default False)
+        """
+        if conditional and not os.path.isfile(filename):
+            if warn:
+                log.warning("Missing optional file {}".format(filename))
+
+            return
+
+        with open(filename) as FILE:
+            user_config = yaml.load(FILE, Loader=yaml.FullLoader)
+
+        log.info(
+            "Loading {} configuration values from '{}'.".format(
+                len(user_config), filename
+            )
+        )
+
+        environment_dict = {"os_environ_{}".format(k): v for k, v in os.environ.items()}
+
+        variables = dict(self)
+        variables.update(user_config)
+        variables.update(environment_dict)
+
+        with open(filename, "rt") as FILE:
+            user_config_string = FILE.read()
+
+        user_config_template = Template(user_config_string)
+        user_config_string = user_config_template.substitute(variables)
+
+        user_config = yaml.load(user_config_string, Loader=yaml.FullLoader)
+
+        self.update(user_config)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
new file mode 100644
index 00000000..57e0b379
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -0,0 +1,179 @@
+from typing import Iterator
+import tablib
+from more_itertools import peekable
+
+from sfdata_stream_parser import events
+from sfdata_stream_parser.collectors import xml_collector
+
+
+class CSWWEvent(events.ParseEvent):
+    pass
+
+
+class HeaderEvent(events.ParseEvent):
+    pass
+
+
+def _reduce_dict(dict_instance):
+    new_dict = {}
+    for key, value in dict_instance.items():
+        if len(value) == 1:
+            new_dict[key] = value[0]
+        else:
+            new_dict[key] = value
+    return new_dict
+
+
+@xml_collector
+def text_collector(stream):
+    data_dict = {}
+    current_element = None
+    for event in stream:
+        if isinstance(event, events.StartElement):
+            current_element = event.tag
+        if isinstance(event, events.TextNode) and event.text:
+            data_dict.setdefault(current_element, []).append(event.text)
+    return _reduce_dict(data_dict)
+
+
+# @xml_collector
+# def csww_collector(stream):
+#     data_dict = {}
+#     stream = peekable(stream)
+#     last_tag = None
+#     while stream:
+#         event = stream.peek()
+#         last_tag = event.get("tag", last_tag)
+#         if event.get("tag") in (
+#             "Assessments",
+#             "CINPlanDates",
+#             "Section47",
+#             "ChildProtectionPlans",
+#         ):
+#             data_dict.setdefault(event.tag, []).append(text_collector(stream))
+#         else:
+#             if isinstance(event, events.TextNode) and event.text:
+#                 data_dict.setdefault(last_tag, []).append(event.text)
+#             next(stream)
+
+#     return _reduce_dict(data_dict)
+
+
+# @xml_collector
+# def cswwworker_collector(stream):
+#     data_dict = {}
+#     stream = peekable(stream)
+#     assert stream.peek().tag == "CSWWWorker"
+#     while stream:
+#         event = stream.peek()
+#         print(f"Event tag = {event.get('tag')}")
+#         if event.get("tag") == "CSWWdetails":
+#             data_dict.setdefault(event.tag, []).append(text_collector(stream))
+#         # elif event.get("tag") == "LALevelVacancies":
+#         #     data_dict.setdefault(event.tag, []).append(csww_collector(stream))
+#         else:
+#             next(stream)
+#     return _reduce_dict(data_dict)
+
+
+@xml_collector
+def message_collector(stream):
+    stream = peekable(stream)
+    assert stream.peek().tag == "Message", "Expected Message, got {}".format(
+        stream.peek().tag
+    )
+    while stream:
+        event = stream.peek()
+        if event.get("tag") == "Header":
+            print(f"Header stream = {stream}")
+            header_record = text_collector(stream)
+            if header_record:
+                yield HeaderEvent(record=header_record)
+        elif event.get("tag") == "CSWWWorker":
+            csww_record = text_collector(stream)
+            if csww_record:
+                #print(f"yielding csww_record event: {CSWWEvent(record=csww_record)}")
+                #print(f"CSWWEvent(record=csww_record) = {CSWWEvent(record=csww_record)}")
+                yield CSWWEvent(record=csww_record)
+        else:
+            next(stream)
+
+
+__EXPORT_HEADERS = [
+    "AgencyWorker",
+    "SWENo",
+    "FTE",   
+    "PersonBirthDate",
+    "GenderCurrent",
+    "Ethnicity",
+    "QualInst",
+    "StepUpGrad",
+    "RoleStartDate",
+    "StartOrigin",
+    "Cases30",
+    "WorkingDaysLost",
+    "ContractWeeks",
+    "FrontlineGrad",
+    "Absat30Sept",
+    "ReasonAbsence",
+    "CFKSSstatus",
+]
+
+
+def _maybe_list(value):
+    if value is None:
+        value = []
+    if not isinstance(value, list):
+        value = [value]
+    print(f"maybe_list(value) = {value}")
+    return value
+
+
+def csww_event(record, property, event_name=None):
+    if event_name is None:
+        event_name = property
+    value = record.get(property)
+    if value:
+        new_record = {**record, "Date": value, "Type": event_name}
+        return ({k: new_record.get(k) for k in __EXPORT_HEADERS},)
+
+    return ()
+
+
+def event_to_records(event: CSWWEvent) -> Iterator[dict]:
+    record = event.record
+    #print(f"event.record = {record}")
+    # child = {
+    #     **record.get("ChildIdentifiers", {}),
+    #     **record.get("ChildCharacteristics", {}),
+    # }
+    # child["Disabilities"] = ",".join(_maybe_list(child.get("Disability")))
+
+    print(record.get("CSWWWorker"))
+    for csww_item in _maybe_list(record):
+        yield from csww_event({**child, **csww_item}, "CINreferralDate")
+        yield from csww_event({**child, **csww_item}, "CINclosureDate")
+
+        for assessment in _maybe_list(csww_item.get("Assessments")):
+            assessment["Factors"] = ",".join(
+                _maybe_list(assessment.get("AssessmentFactors"))
+            )
+            yield from csww_event(
+                {**child, **csww_item, **assessment}, "AssessmentActualStartDate"
+            )
+            yield from csww_event(
+                {**child, **csww_item, **assessment}, "AssessmentAuthorisationDate"
+            )
+
+
+def export_table(stream):
+    #print(f"export_table() called for stream: {stream}")
+    data = tablib.Dataset(headers=__EXPORT_HEADERS)
+    #print(f"header data in export_table() = {data}")
+    for event in stream:
+        if isinstance(event, CSWWEvent):
+            for record in event_to_records(event):
+                #print("Found data to append")
+                data.append([record.get(k, "") for k in __EXPORT_HEADERS])
+        else: print("No row data to append")
+    return data
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
new file mode 100644
index 00000000..d7200558
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+import pandas as pd
+import logging
+
+from liiatools.datasets.shared_functions import converters, common
+
+log = logging.getLogger(__name__)
+
+def convert_to_dataframe(data):
+    data = data.export("df")
+    return data
+
+
+def get_year(data, year):
+    data["YEAR"] = year
+    return data
+
+def convert_to_datetime(data):
+    data[["PersonBirthDate", "RoleStartDate"]] = data[
+        ["PersonBirthDate", "RoleStartDate"]
+    ].apply(pd.to_datetime)
+    return data
+
+
+def add_la_name(data, la_name):
+    data["LA"] = la_name
+    return data
+
+
+# def la_prefix(data, la_code):
+#     data["LAchildID"] = data["LAchildID"] + "_" + la_code
+#     return data
+
+
+def add_fields(input_year, data, la_name, la_code):
+    """
+    Add YEAR, LA, PERSONSCHOOLYEAR to exported dataframe
+    Append LA_code from config to LAChildID
+
+    :param input_year: A string of the year of return for the current file
+    :param data: The dataframe to be cleaned
+    :param la_name: LA name
+    :param la_code: LA code
+    :return: Cleaned and degraded dataframe
+    """
+    data = convert_to_dataframe(data)
+    data = get_year(data, input_year)
+    data = convert_to_datetime(data)
+    #data = add_school_year(data)
+    data = add_la_name(data, la_name)
+    #data = la_prefix(data, la_code)
+    #data = degrade_dob(data)
+    #data = degrade_expected_dob(data)
+    #data = degrade_death_date(data)
+    return data
+
+
+def export_file(input, output, data):
+    filename = Path(input).stem
+    outfile = filename + "_clean.csv"
+    output_path = Path(output, outfile)
+    data.to_csv(output_path, index=False)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
new file mode 100644
index 00000000..a8d67146
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
@@ -0,0 +1,17 @@
+from functools import cached_property
+from pathlib import Path
+
+import xmlschema
+
+from liiatools.spec import social_work_workforce as csww_asset_dir
+
+
+class Schema:
+    def __init__(self, year):
+        self.__year = year
+
+    @cached_property
+    def schema(self) -> xmlschema.XMLSchema:
+        return xmlschema.XMLSchema(
+            Path(csww_asset_dir.__file__).parent / f"social_work_workforce_{self.__year}.xsd"
+        )

From f795e54f7bc8169c332b0dae715ea5471f35befc Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 21 Jun 2023 13:44:27 +0000
Subject: [PATCH 02/40] Fix to produce cleanfile output

---
 .../csww_main_functions.py                    |  8 ---
 .../lds_csww_clean/csww_record.py             | 71 +------------------
 2 files changed, 2 insertions(+), 77 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 3184c26f..a381812e 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -63,7 +63,6 @@ def cleanfile(input, la_code, la_log_dir, output):
     """
 
     # Open & Parse file
-    print("Starting # Open & Parse file")
     if (
         check_file_type(
             input,
@@ -86,7 +85,6 @@ def cleanfile(input, la_code, la_log_dir, output):
         return
 
     # Check year is within acceptable range for data retention policy
-    print("Starting # Check year")
     years_to_go_back = 6
     year_start_month = 6
     reference_date = datetime.now()
@@ -95,21 +93,15 @@ def cleanfile(input, la_code, la_log_dir, output):
         return
 
     # Configure stream
-    print("Starting # Configure stream")
     config = clean_config.Config()
     la_name = flip_dict(config["data_codes"])[la_code]
     stream = filters.strip_text(stream)
     stream = filters.add_context(stream)
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
-
     # Output result
-    #print("Starting # Output result")
     stream = csww_record.message_collector(stream) # <=== this is the problem - not returning any stream data
-    #print(f"Stream = {stream}")
     data = csww_record.export_table(stream)
-    #print(f"Data = {data}")
     data = file_creator.add_fields(input_year, data, la_name, la_code)
-    #print(data)
     file_creator.export_file(input, output, data)
 
 cleanfile("/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 57e0b379..b0731819 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -36,46 +36,6 @@ def text_collector(stream):
     return _reduce_dict(data_dict)
 
 
-# @xml_collector
-# def csww_collector(stream):
-#     data_dict = {}
-#     stream = peekable(stream)
-#     last_tag = None
-#     while stream:
-#         event = stream.peek()
-#         last_tag = event.get("tag", last_tag)
-#         if event.get("tag") in (
-#             "Assessments",
-#             "CINPlanDates",
-#             "Section47",
-#             "ChildProtectionPlans",
-#         ):
-#             data_dict.setdefault(event.tag, []).append(text_collector(stream))
-#         else:
-#             if isinstance(event, events.TextNode) and event.text:
-#                 data_dict.setdefault(last_tag, []).append(event.text)
-#             next(stream)
-
-#     return _reduce_dict(data_dict)
-
-
-# @xml_collector
-# def cswwworker_collector(stream):
-#     data_dict = {}
-#     stream = peekable(stream)
-#     assert stream.peek().tag == "CSWWWorker"
-#     while stream:
-#         event = stream.peek()
-#         print(f"Event tag = {event.get('tag')}")
-#         if event.get("tag") == "CSWWdetails":
-#             data_dict.setdefault(event.tag, []).append(text_collector(stream))
-#         # elif event.get("tag") == "LALevelVacancies":
-#         #     data_dict.setdefault(event.tag, []).append(csww_collector(stream))
-#         else:
-#             next(stream)
-#     return _reduce_dict(data_dict)
-
-
 @xml_collector
 def message_collector(stream):
     stream = peekable(stream)
@@ -85,15 +45,12 @@ def message_collector(stream):
     while stream:
         event = stream.peek()
         if event.get("tag") == "Header":
-            print(f"Header stream = {stream}")
             header_record = text_collector(stream)
             if header_record:
                 yield HeaderEvent(record=header_record)
         elif event.get("tag") == "CSWWWorker":
             csww_record = text_collector(stream)
             if csww_record:
-                #print(f"yielding csww_record event: {CSWWEvent(record=csww_record)}")
-                #print(f"CSWWEvent(record=csww_record) = {CSWWEvent(record=csww_record)}")
                 yield CSWWEvent(record=csww_record)
         else:
             next(stream)
@@ -125,7 +82,6 @@ def _maybe_list(value):
         value = []
     if not isinstance(value, list):
         value = [value]
-    print(f"maybe_list(value) = {value}")
     return value
 
 
@@ -142,38 +98,15 @@ def csww_event(record, property, event_name=None):
 
 def event_to_records(event: CSWWEvent) -> Iterator[dict]:
     record = event.record
-    #print(f"event.record = {record}")
-    # child = {
-    #     **record.get("ChildIdentifiers", {}),
-    #     **record.get("ChildCharacteristics", {}),
-    # }
-    # child["Disabilities"] = ",".join(_maybe_list(child.get("Disability")))
-
-    print(record.get("CSWWWorker"))
-    for csww_item in _maybe_list(record):
-        yield from csww_event({**child, **csww_item}, "CINreferralDate")
-        yield from csww_event({**child, **csww_item}, "CINclosureDate")
 
-        for assessment in _maybe_list(csww_item.get("Assessments")):
-            assessment["Factors"] = ",".join(
-                _maybe_list(assessment.get("AssessmentFactors"))
-            )
-            yield from csww_event(
-                {**child, **csww_item, **assessment}, "AssessmentActualStartDate"
-            )
-            yield from csww_event(
-                {**child, **csww_item, **assessment}, "AssessmentAuthorisationDate"
-            )
+    for csww_item in _maybe_list(record):
+        yield from csww_event({**csww_item}, "StepUpGrad")
 
 
 def export_table(stream):
-    #print(f"export_table() called for stream: {stream}")
     data = tablib.Dataset(headers=__EXPORT_HEADERS)
-    #print(f"header data in export_table() = {data}")
     for event in stream:
         if isinstance(event, CSWWEvent):
             for record in event_to_records(event):
-                #print("Found data to append")
                 data.append([record.get(k, "") for k in __EXPORT_HEADERS])
-        else: print("No row data to append")
     return data

From 8d3bd6c60da4959460e395bb894f3dd81527fbfa Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 21 Jun 2023 14:00:10 +0000
Subject: [PATCH 03/40] Run python black

---
 .../csww_main_functions.py                    | 31 +++++++++++++------
 .../lds_csww_clean/csww_record.py             |  2 +-
 .../lds_csww_clean/file_creator.py            | 12 ++++---
 .../lds_csww_clean/schema.py                  |  3 +-
 4 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index a381812e..30290cfb 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -18,7 +18,7 @@
 from liiatools.datasets.social_work_workforce.lds_csww_clean import (
     file_creator,
     configuration as clean_config,
-    csww_record
+    csww_record,
 )
 
 from liiatools.datasets.shared_functions.common import (
@@ -28,9 +28,10 @@
     check_year,
     check_year_within_range,
     save_year_error,
-    save_incorrect_year_error
+    save_incorrect_year_error,
 )
 
+
 def generate_sample(output: str):
     """
     Export a sample file for testing
@@ -52,6 +53,7 @@ def generate_sample(output: str):
     except FileNotFoundError:
         print("The file path provided does not exist")
 
+
 def cleanfile(input, la_code, la_log_dir, output):
     """
     Cleans input Children Social Work workforce xml files according to config and outputs cleaned csv files.
@@ -88,7 +90,12 @@ def cleanfile(input, la_code, la_log_dir, output):
     years_to_go_back = 6
     year_start_month = 6
     reference_date = datetime.now()
-    if check_year_within_range(input_year, years_to_go_back, year_start_month, reference_date) is False:
+    if (
+        check_year_within_range(
+            input_year, years_to_go_back, year_start_month, reference_date
+        )
+        is False
+    ):
         save_incorrect_year_error(input, la_log_dir)
         return
 
@@ -99,15 +106,19 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = filters.add_context(stream)
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
     # Output result
-    stream = csww_record.message_collector(stream) # <=== this is the problem - not returning any stream data
+    stream = csww_record.message_collector(
+        stream
+    )  # <=== this is the problem - not returning any stream data
     data = csww_record.export_table(stream)
     data = file_creator.add_fields(input_year, data, la_name, la_code)
     file_creator.export_file(input, output, data)
 
-cleanfile("/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-            "BAD",
-            "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-            "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean"
-            )
 
-print("===> Finished running csww_main_functions.py")
\ No newline at end of file
+cleanfile(
+    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+    "BAD",
+    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
+
+print("===> Finished running csww_main_functions.py")
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index b0731819..1a3c1b83 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -59,7 +59,7 @@ def message_collector(stream):
 __EXPORT_HEADERS = [
     "AgencyWorker",
     "SWENo",
-    "FTE",   
+    "FTE",
     "PersonBirthDate",
     "GenderCurrent",
     "Ethnicity",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index d7200558..d951c358 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -6,6 +6,7 @@
 
 log = logging.getLogger(__name__)
 
+
 def convert_to_dataframe(data):
     data = data.export("df")
     return data
@@ -15,6 +16,7 @@ def get_year(data, year):
     data["YEAR"] = year
     return data
 
+
 def convert_to_datetime(data):
     data[["PersonBirthDate", "RoleStartDate"]] = data[
         ["PersonBirthDate", "RoleStartDate"]
@@ -46,12 +48,12 @@ def add_fields(input_year, data, la_name, la_code):
     data = convert_to_dataframe(data)
     data = get_year(data, input_year)
     data = convert_to_datetime(data)
-    #data = add_school_year(data)
+    # data = add_school_year(data)
     data = add_la_name(data, la_name)
-    #data = la_prefix(data, la_code)
-    #data = degrade_dob(data)
-    #data = degrade_expected_dob(data)
-    #data = degrade_death_date(data)
+    # data = la_prefix(data, la_code)
+    # data = degrade_dob(data)
+    # data = degrade_expected_dob(data)
+    # data = degrade_death_date(data)
     return data
 
 
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
index a8d67146..f82b5eb1 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
@@ -13,5 +13,6 @@ def __init__(self, year):
     @cached_property
     def schema(self) -> xmlschema.XMLSchema:
         return xmlschema.XMLSchema(
-            Path(csww_asset_dir.__file__).parent / f"social_work_workforce_{self.__year}.xsd"
+            Path(csww_asset_dir.__file__).parent
+            / f"social_work_workforce_{self.__year}.xsd"
         )

From ac82a0ca801978f291094bb55b6d841c35b0d643 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 3 Jul 2023 11:48:05 +0000
Subject: [PATCH 04/40] Minor changes

---
 liiatools/datasets/social_work_workforce/csww_main_functions.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 30290cfb..970638d2 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -120,5 +120,3 @@ def cleanfile(input, la_code, la_log_dir, output):
     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 )
-
-print("===> Finished running csww_main_functions.py")

From 333617e85e01837191a2cdbf02c0745ec685f16d Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 3 Jul 2023 11:49:10 +0000
Subject: [PATCH 05/40] Sort imports

---
 .../social_work_workforce/csww_main_functions.py         | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 970638d2..f10e0596 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,7 +1,7 @@
-import logging
-import click_log
-import click as click
+# import logging
+# import click_log
 from pathlib import Path
+import click as click
 from datetime import datetime
 
 from liiatools.datasets.social_work_workforce.sample_data import (
@@ -105,10 +105,11 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = filters.strip_text(stream)
     stream = filters.add_context(stream)
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
+    
     # Output result
     stream = csww_record.message_collector(
         stream
-    )  # <=== this is the problem - not returning any stream data
+    )
     data = csww_record.export_table(stream)
     data = file_creator.add_fields(input_year, data, la_name, la_code)
     file_creator.export_file(input, output, data)

From 0a077e2e13b46c44d3e6ec707c7df342cb57e2ef Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 10:48:45 +0000
Subject: [PATCH 06/40] Create separate csv clean file for LA level data

---
 .../csww_main_functions.py                    | 24 ++++----
 .../lds_csww_clean/csww_record.py             | 58 +++++++++++++++----
 .../lds_csww_clean/file_creator.py            | 26 ++++++---
 3 files changed, 76 insertions(+), 32 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index f10e0596..8e41ade8 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,7 +1,4 @@
-# import logging
-# import click_log
 from pathlib import Path
-import click as click
 from datetime import datetime
 
 from liiatools.datasets.social_work_workforce.sample_data import (
@@ -87,8 +84,8 @@ def cleanfile(input, la_code, la_log_dir, output):
         return
 
     # Check year is within acceptable range for data retention policy
-    years_to_go_back = 6
-    year_start_month = 6
+    years_to_go_back = 7
+    year_start_month = 1
     reference_date = datetime.now()
     if (
         check_year_within_range(
@@ -105,14 +102,15 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = filters.strip_text(stream)
     stream = filters.add_context(stream)
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
-    
-    # Output result
-    stream = csww_record.message_collector(
-        stream
-    )
-    data = csww_record.export_table(stream)
-    data = file_creator.add_fields(input_year, data, la_name, la_code)
-    file_creator.export_file(input, output, data)
+
+    # Output results
+    stream = csww_record.message_collector(stream)
+
+    data_worker, data_lalevel = csww_record.export_table(stream)
+    data_worker = file_creator.add_fields(input_year, data_worker, la_name)
+    data_lalevel = file_creator.add_fields(input_year, data_lalevel, la_name)
+    file_creator.export_file(input, output, data_lalevel, "lalevel")
+    file_creator.export_file(input, output, data_worker, "worker")
 
 
 cleanfile(
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 1a3c1b83..07414065 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -10,6 +10,10 @@ class CSWWEvent(events.ParseEvent):
     pass
 
 
+class LALevelEvent(events.ParseEvent):
+    pass
+
+
 class HeaderEvent(events.ParseEvent):
     pass
 
@@ -52,11 +56,15 @@ def message_collector(stream):
             csww_record = text_collector(stream)
             if csww_record:
                 yield CSWWEvent(record=csww_record)
+        elif event.get("tag") == "LALevelVacancies":
+            lalevel_record = text_collector(stream)
+            if lalevel_record:
+                yield LALevelEvent(record=lalevel_record)
         else:
             next(stream)
 
 
-__EXPORT_HEADERS = [
+__EXPORT_HEADERS_CSWWWORKER = [
     "AgencyWorker",
     "SWENo",
     "FTE",
@@ -76,6 +84,12 @@ def message_collector(stream):
     "CFKSSstatus",
 ]
 
+__EXPORT_HEADERS_LALEVELVAC = [
+    "NumberOfVacancies",
+    "NoAgencyFTE",
+    "NoAgencyHeadcount",
+]
+
 
 def _maybe_list(value):
     if value is None:
@@ -85,28 +99,52 @@ def _maybe_list(value):
     return value
 
 
-def csww_event(record, property, event_name=None):
+def csww_event_worker(record, property, event_name=None):
     if event_name is None:
         event_name = property
     value = record.get(property)
     if value:
         new_record = {**record, "Date": value, "Type": event_name}
-        return ({k: new_record.get(k) for k in __EXPORT_HEADERS},)
+        return ({k: new_record.get(k) for k in __EXPORT_HEADERS_CSWWWORKER},)
 
     return ()
 
 
-def event_to_records(event: CSWWEvent) -> Iterator[dict]:
-    record = event.record
+def lalevel_event(record, property, event_name=None):
+    if event_name is None:
+        event_name = property
+    value = record.get(property)
+    if value:
+        new_record = {**record, "Date": value, "Type": event_name}
+        return ({k: new_record.get(k) for k in __EXPORT_HEADERS_LALEVELVAC},)
+
+    return ()
+
 
+def event_to_records_worker(event: CSWWEvent) -> Iterator[dict]:
+    record = event.record
     for csww_item in _maybe_list(record):
-        yield from csww_event({**csww_item}, "StepUpGrad")
+        yield from csww_event_worker({**csww_item}, "StepUpGrad")
+
+
+def event_to_records_lalevel(event: LALevelEvent) -> Iterator[dict]:
+    record = event.record
+    for lalevel_item in _maybe_list(record):
+        yield from lalevel_event({**lalevel_item}, "NoAgencyFTE")
 
 
 def export_table(stream):
-    data = tablib.Dataset(headers=__EXPORT_HEADERS)
+    data_worker = tablib.Dataset(headers=__EXPORT_HEADERS_CSWWWORKER)
+    data_lalevel = tablib.Dataset(headers=__EXPORT_HEADERS_LALEVELVAC)
     for event in stream:
         if isinstance(event, CSWWEvent):
-            for record in event_to_records(event):
-                data.append([record.get(k, "") for k in __EXPORT_HEADERS])
-    return data
+            for record in event_to_records_worker(event):
+                data_worker.append(
+                    [record.get(k, "") for k in __EXPORT_HEADERS_CSWWWORKER]
+                )
+        elif isinstance(event, LALevelEvent):
+            for record in event_to_records_lalevel(event):
+                data_lalevel.append(
+                    [record.get(k, "") for k in __EXPORT_HEADERS_LALEVELVAC]
+                )
+    return data_worker, data_lalevel
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index d951c358..fa4d841b 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -34,22 +34,20 @@ def add_la_name(data, la_name):
 #     return data
 
 
-def add_fields(input_year, data, la_name, la_code):
+def add_fields(input_year, data, la_name):
     """
-    Add YEAR, LA, PERSONSCHOOLYEAR to exported dataframe
-    Append LA_code from config to LAChildID
+    Add YEAR, LA to exported dataframe
 
     :param input_year: A string of the year of return for the current file
     :param data: The dataframe to be cleaned
     :param la_name: LA name
-    :param la_code: LA code
     :return: Cleaned and degraded dataframe
     """
     data = convert_to_dataframe(data)
     data = get_year(data, input_year)
-    data = convert_to_datetime(data)
-    # data = add_school_year(data)
     data = add_la_name(data, la_name)
+
+    # data = convert_to_datetime(data)
     # data = la_prefix(data, la_code)
     # data = degrade_dob(data)
     # data = degrade_expected_dob(data)
@@ -57,8 +55,18 @@ def add_fields(input_year, data, la_name, la_code):
     return data
 
 
-def export_file(input, output, data):
-    filename = Path(input).stem
-    outfile = filename + "_clean.csv"
+def export_file(input, output, data, filenamelevel):
+    """
+    Output cleansed and degraded dataframe as csv file.
+    Example of output filename: social_work_workforce_2022_lalevel_clean.csv
+
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param output: should specify the path to the output folder
+    :param data: The cleansed dataframe to be output
+    :param filenamelevel: String appended to output filename indicating aggregation level - worker or LA level
+    :return: csv file containing cleaned and degraded dataframe
+    """
+    filenamestem = Path(input).stem
+    outfile = filenamestem + "_" + filenamelevel + "_clean.csv"
     output_path = Path(output, outfile)
     data.to_csv(output_path, index=False)

From 88a2f2953402b36331580a1260ebd8df8b5c5c74 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 12:44:11 +0000
Subject: [PATCH 07/40] Simplify event_to_records

---
 .../lds_csww_clean/csww_record.py                | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 07414065..c2f8fdd3 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -121,16 +121,10 @@ def lalevel_event(record, property, event_name=None):
     return ()
 
 
-def event_to_records_worker(event: CSWWEvent) -> Iterator[dict]:
+def event_to_records(event) -> Iterator[dict]:
     record = event.record
-    for csww_item in _maybe_list(record):
-        yield from csww_event_worker({**csww_item}, "StepUpGrad")
-
-
-def event_to_records_lalevel(event: LALevelEvent) -> Iterator[dict]:
-    record = event.record
-    for lalevel_item in _maybe_list(record):
-        yield from lalevel_event({**lalevel_item}, "NoAgencyFTE")
+    for item in _maybe_list(record):
+        yield from (item,)
 
 
 def export_table(stream):
@@ -138,12 +132,12 @@ def export_table(stream):
     data_lalevel = tablib.Dataset(headers=__EXPORT_HEADERS_LALEVELVAC)
     for event in stream:
         if isinstance(event, CSWWEvent):
-            for record in event_to_records_worker(event):
+            for record in event_to_records(event):
                 data_worker.append(
                     [record.get(k, "") for k in __EXPORT_HEADERS_CSWWWORKER]
                 )
         elif isinstance(event, LALevelEvent):
-            for record in event_to_records_lalevel(event):
+            for record in event_to_records(event):
                 data_lalevel.append(
                     [record.get(k, "") for k in __EXPORT_HEADERS_LALEVELVAC]
                 )

From 7e5102216ba6db9583ad24576c4137f867ba8b33 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 12:50:14 +0000
Subject: [PATCH 08/40] Remove unnecessary event functions

---
 .../lds_csww_clean/csww_record.py             | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index c2f8fdd3..7eea634b 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -99,28 +99,6 @@ def _maybe_list(value):
     return value
 
 
-def csww_event_worker(record, property, event_name=None):
-    if event_name is None:
-        event_name = property
-    value = record.get(property)
-    if value:
-        new_record = {**record, "Date": value, "Type": event_name}
-        return ({k: new_record.get(k) for k in __EXPORT_HEADERS_CSWWWORKER},)
-
-    return ()
-
-
-def lalevel_event(record, property, event_name=None):
-    if event_name is None:
-        event_name = property
-    value = record.get(property)
-    if value:
-        new_record = {**record, "Date": value, "Type": event_name}
-        return ({k: new_record.get(k) for k in __EXPORT_HEADERS_LALEVELVAC},)
-
-    return ()
-
-
 def event_to_records(event) -> Iterator[dict]:
     record = event.record
     for item in _maybe_list(record):

From db9cd7d31d853c530e47bbc0f29c5e161df752d5 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 15:09:34 +0000
Subject: [PATCH 09/40] Degrade dob and SWENo

---
 .../lds_csww_clean/file_creator.py            | 57 +++++++++++++++----
 1 file changed, 46 insertions(+), 11 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index fa4d841b..3f383dcd 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -1,6 +1,9 @@
 from pathlib import Path
 import pandas as pd
 import logging
+import hashlib
+from typing import Dict
+from decouple import config
 
 from liiatools.datasets.shared_functions import converters, common
 
@@ -18,9 +21,10 @@ def get_year(data, year):
 
 
 def convert_to_datetime(data):
-    data[["PersonBirthDate", "RoleStartDate"]] = data[
-        ["PersonBirthDate", "RoleStartDate"]
-    ].apply(pd.to_datetime)
+    if set(["PersonBirthDate", "RoleStartDate"]).issubset(data):
+        data[["PersonBirthDate", "RoleStartDate"]] = data[
+            ["PersonBirthDate", "RoleStartDate"]
+        ].apply(pd.to_datetime)
     return data
 
 
@@ -29,9 +33,42 @@ def add_la_name(data, la_name):
     return data
 
 
-# def la_prefix(data, la_code):
-#     data["LAchildID"] = data["LAchildID"] + "_" + la_code
-#     return data
+def degrade_dob(data):
+    if "PersonBirthDate" in data:
+        if data["PersonBirthDate"] is not None:
+            data["PersonBirthDate"] = data["PersonBirthDate"].apply(
+                lambda row: converters.to_month_only_dob(row)
+            )
+    return data
+
+
+def degrade_SWENo(data):
+    """
+    Replaces SWE number with hashed version
+    """
+    if "SWENo" in data:
+        if data["SWENo"] is not None:
+            data["SWENo"] = data["SWENo"].apply(lambda row: swe_hash(row))
+    return data
+
+
+def swe_hash(swe_num):
+    """
+    Converts the **SWENo** field to a hash code represented in HEX
+    :param swe_num: SWE number to be converted
+    :return: Hash code version of SWE number
+    """
+
+    private_string = config("sec_str", default="")
+
+    private_key = swe_num + private_string
+
+    # Preparing plain text (SWENo) to hash it
+    plaintext = private_key.encode()
+
+    hash_algorithm = hashlib.sha3_256(plaintext)
+
+    return hash_algorithm.hexdigest()
 
 
 def add_fields(input_year, data, la_name):
@@ -47,11 +84,9 @@ def add_fields(input_year, data, la_name):
     data = get_year(data, input_year)
     data = add_la_name(data, la_name)
 
-    # data = convert_to_datetime(data)
-    # data = la_prefix(data, la_code)
-    # data = degrade_dob(data)
-    # data = degrade_expected_dob(data)
-    # data = degrade_death_date(data)
+    data = convert_to_datetime(data)
+    data = degrade_dob(data)
+    data = degrade_SWENo(data)
     return data
 
 

From 57a90922163c2c45c42eb16a15edcd1f5ee663db Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 10:38:40 +0000
Subject: [PATCH 10/40] Create additional sample files for testing la_agg

---
 .../csww/BAD/social_work_workforce_2021.xml   | 556 ++++++++++++++++++
 .../csww/BAD/social_work_workforce_2022.xml   |   4 +-
 .../social_work_workforce_2021.xsd            | 254 ++++++++
 3 files changed, 812 insertions(+), 2 deletions(-)
 create mode 100644 liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
 create mode 100644 liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd

diff --git a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
new file mode 100644
index 00000000..8ef1b9ef
--- /dev/null
+++ b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
@@ -0,0 +1,556 @@
+<Message>
+  <Header>
+    <CollectionDetails>
+      <Collection>CSWW</Collection>
+      <Year>2021</Year>
+      <ReferenceDate>2021-09-30</ReferenceDate>
+    </CollectionDetails>
+    <Source>
+      <SourceLevel>L</SourceLevel>
+      <LEA>301</LEA>
+      <SoftwareCode>liiatools.datasets.social_work_workforce.sample_data</SoftwareCode>
+      <DateTime>2023-03-28T14:54:55Z</DateTime>
+    </Source>
+  </Header>
+  <LALevelVacancies>
+    <NumberOfVacancies>66.66</NumberOfVacancies>
+    <NoAgencyFTE>40.40</NoAgencyFTE>
+    <NoAgencyHeadcount>100</NoAgencyHeadcount>
+  </LALevelVacancies>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Ox2054309383</SWENo>
+    <FTE>0.521371</FTE>
+    <PersonBirthDate>1969-01-22</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <Ethnicity>REFU</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>0</StepUpGrad>
+    <RoleStartDate>1988-04-07</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <Cases30>72</Cases30>
+    <WorkingDaysLost>15.31</WorkingDaysLost>
+    <ContractWeeks>288.7</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <Absat30Sept>0</Absat30Sept>
+    <ReasonAbsence>TRN</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Yk7226043359</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1958-04-07</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>2</OrgRole>
+    <StartOrigin>8</StartOrigin>
+    <RoleEndDate>2019-09-23</RoleEndDate>
+    <LeaverDestination>7</LeaverDestination>
+    <ReasonLeave>10</ReasonLeave>
+    <FTE30>0.603665</FTE30>
+    <Cases30>66</Cases30>
+    <WorkingDaysLost>29.87</WorkingDaysLost>
+    <ContractWeeks>2.5</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>iP8098309864</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1984-01-12</PersonBirthDate>
+    <Ethnicity>APKN</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>5</OrgRole>
+    <RoleStartDate>2014-01-26</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>3</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <FTE30>0.23246</FTE30>
+    <WorkingDaysLost>92.56</WorkingDaysLost>
+    <ContractWeeks>213.4</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>oP8178849586</SWENo>
+    <FTE>0.899676</FTE>
+    <PersonBirthDate>1990-09-28</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <Ethnicity>BAFR</Ethnicity>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>2</StartOrigin>
+    <FTE30>0.429963</FTE30>
+    <WorkingDaysLost>14.39</WorkingDaysLost>
+    <FrontlineGrad>0</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>nH9419631053</SWENo>
+    <FTE>0.133587</FTE>
+    <GenderCurrent>2</GenderCurrent>
+    <Ethnicity>AIND</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2017-06-10</RoleStartDate>
+    <FTE30>0.436348</FTE30>
+    <WorkingDaysLost>5.39</WorkingDaysLost>
+    <ContractWeeks>475.7</ContractWeeks>
+    <Absat30Sept>1</Absat30Sept>
+    <ReasonAbsence>UNP</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>JJ3661684122</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1993-05-19</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <QualLevel>3</QualLevel>
+    <RoleStartDate>2020-06-14</RoleStartDate>
+    <StartOrigin>3</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>1</LeaverDestination>
+    <ReasonLeave>5</ReasonLeave>
+    <FTE30>0.903669</FTE30>
+    <Cases30>11</Cases30>
+    <ContractWeeks>141.0</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>tN2120744892</SWENo>
+    <FTE>0.803122</FTE>
+    <Ethnicity>WBRI</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <FTE30>0.964327</FTE30>
+    <WorkingDaysLost>95.06</WorkingDaysLost>
+    <ContractWeeks>403.6</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Zo9779760045</SWENo>
+    <FTE>0.767688</FTE>
+    <PersonBirthDate>1996-08-31</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>MWAS</Ethnicity>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>5</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <Cases30>62</Cases30>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>wf3752370095</SWENo>
+    <FTE>0.843488</FTE>
+    <PersonBirthDate>1959-04-17</PersonBirthDate>
+    <GenderCurrent>2</GenderCurrent>
+    <Ethnicity>APKN</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <RoleStartDate>1997-10-01</RoleStartDate>
+    <StartOrigin>2</StartOrigin>
+    <FTE30>0.712824</FTE30>
+    <WorkingDaysLost>16.74</WorkingDaysLost>
+    <ContractWeeks>456.3</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>OW2475789301</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1971-10-02</PersonBirthDate>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>1993-10-04</RoleStartDate>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>3</LeaverDestination>
+    <ReasonLeave>6</ReasonLeave>
+    <FTE30>0.908092</FTE30>
+    <Cases30>45</Cases30>
+    <WorkingDaysLost>22.98</WorkingDaysLost>
+    <ContractWeeks>441.5</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>3</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Kv3016593719</SWENo>
+    <FTE>0.12232</FTE>
+    <PersonBirthDate>1996-06-05</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <Ethnicity>BAFR</Ethnicity>
+    <QualLevel>2</QualLevel>
+    <OrgRole>6</OrgRole>
+    <StartOrigin>10</StartOrigin>
+    <FTE30>0.641824</FTE30>
+    <Cases30>23</Cases30>
+    <WorkingDaysLost>36.13</WorkingDaysLost>
+    <ContractWeeks>213.1</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>TB9669555723</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1987-10-30</PersonBirthDate>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>2</OrgRole>
+    <RoleStartDate>2012-10-02</RoleStartDate>
+    <StartOrigin>2</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>6</LeaverDestination>
+    <ReasonLeave>7</ReasonLeave>
+    <Cases30>37</Cases30>
+    <WorkingDaysLost>90.85</WorkingDaysLost>
+    <ContractWeeks>28.5</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <Absat30Sept>1</Absat30Sept>
+    <ReasonAbsence>UNA</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>QK8499162867</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1968-11-27</PersonBirthDate>
+    <Ethnicity>ABAN</Ethnicity>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2018-08-03</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>2</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <FTE30>0.078464</FTE30>
+    <WorkingDaysLost>43.02</WorkingDaysLost>
+    <ContractWeeks>154.7</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Wr5514040878</SWENo>
+    <FTE>0</FTE>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>AOTH</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>2</OrgRole>
+    <RoleStartDate>2015-04-24</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>1</LeaverDestination>
+    <ReasonLeave>6</ReasonLeave>
+    <WorkingDaysLost>3.51</WorkingDaysLost>
+    <ContractWeeks>424.0</ContractWeeks>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Aj9242652291</SWENo>
+    <FTE>0.859218</FTE>
+    <PersonBirthDate>1968-12-31</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>BCRB</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>2003-09-12</RoleStartDate>
+    <StartOrigin>5</StartOrigin>
+    <FTE30>0.320526</FTE30>
+    <Cases30>85</Cases30>
+    <WorkingDaysLost>98.22</WorkingDaysLost>
+    <ContractWeeks>206.6</ContractWeeks>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Jv2635496195</SWENo>
+    <FTE>0.021911</FTE>
+    <PersonBirthDate>1977-06-27</PersonBirthDate>
+    <Ethnicity>REFU</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2022-10-08</RoleStartDate>
+    <StartOrigin>6</StartOrigin>
+    <FTE30>0.69819</FTE30>
+    <Cases30>25</Cases30>
+    <WorkingDaysLost>29.19</WorkingDaysLost>
+    <FrontlineGrad>1</FrontlineGrad>
+    <Absat30Sept>1</Absat30Sept>
+    <ReasonAbsence>SIC</ReasonAbsence>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>To5555885076</SWENo>
+    <FTE>0.786453</FTE>
+    <PersonBirthDate>1996-11-18</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>MWAS</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>4</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>1</StartOrigin>
+    <FTE30>0.441344</FTE30>
+    <Cases30>83</Cases30>
+    <WorkingDaysLost>78.29</WorkingDaysLost>
+    <ContractWeeks>364.4</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>rK9218104079</SWENo>
+    <FTE>0.491425</FTE>
+    <PersonBirthDate>1998-04-15</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.939826</FTE30>
+    <WorkingDaysLost>3.1</WorkingDaysLost>
+    <ContractWeeks>415.3</ContractWeeks>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>cD9282390165</SWENo>
+    <FTE>0.192894</FTE>
+    <PersonBirthDate>1959-09-25</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>REFU</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>3</OrgRole>
+    <RoleStartDate>1985-12-12</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <FTE30>0.18449</FTE30>
+    <Cases30>14</Cases30>
+    <ContractWeeks>188.4</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>zU6140515687</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1962-11-04</PersonBirthDate>
+    <Ethnicity>WBRI</Ethnicity>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>3</OrgRole>
+    <RoleStartDate>1999-07-14</RoleStartDate>
+    <RoleEndDate>2018-08-20</RoleEndDate>
+    <LeaverDestination>8</LeaverDestination>
+    <ReasonLeave>3</ReasonLeave>
+    <FTE30>0.222573</FTE30>
+    <Cases30>65</Cases30>
+    <WorkingDaysLost>16.26</WorkingDaysLost>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>ih3342923522</SWENo>
+    <FTE>0.862474</FTE>
+    <PersonBirthDate>1992-02-18</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>WBRI</Ethnicity>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.761443</FTE30>
+    <Cases30>39</Cases30>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>cm3809724991</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>2001-10-29</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <Ethnicity>AIND</Ethnicity>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <StartOrigin>1</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>5</LeaverDestination>
+    <ReasonLeave>4</ReasonLeave>
+    <FTE30>0.530908</FTE30>
+    <Cases30>29</Cases30>
+    <WorkingDaysLost>38.71</WorkingDaysLost>
+    <ContractWeeks>339.9</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>PA8564166424</SWENo>
+    <FTE>0.668266</FTE>
+    <PersonBirthDate>1983-04-13</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>3</StartOrigin>
+    <FTE30>0.707445</FTE30>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>QW8564363911</SWENo>
+    <FTE>0.978729</FTE>
+    <PersonBirthDate>1958-04-26</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <Ethnicity>MWBA</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <RoleStartDate>2002-01-31</RoleStartDate>
+    <StartOrigin>1</StartOrigin>
+    <FTE30>0.698641</FTE30>
+    <ContractWeeks>121.9</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>3</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>PQ5842914246</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1989-06-05</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2011-08-31</RoleStartDate>
+    <StartOrigin>5</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>7</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <FTE30>0.443976</FTE30>
+    <Cases30>70</Cases30>
+    <WorkingDaysLost>12.2</WorkingDaysLost>
+    <ContractWeeks>301.3</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>ZQ9393137749</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1981-09-21</PersonBirthDate>
+    <Ethnicity>CHNE</Ethnicity>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2001-02-10</RoleStartDate>
+    <StartOrigin>6</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>4</LeaverDestination>
+    <ReasonLeave>1</ReasonLeave>
+    <FTE30>0.821627</FTE30>
+    <WorkingDaysLost>94.67</WorkingDaysLost>
+    <ContractWeeks>471.5</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Pv9093835426</SWENo>
+    <FTE>0.561974</FTE>
+    <Ethnicity>OOTH</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2014-09-30</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.965936</FTE30>
+    <Cases30>63</Cases30>
+    <WorkingDaysLost>87.59</WorkingDaysLost>
+    <Absat30Sept>0</Absat30Sept>
+    <ReasonAbsence>SIC</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>eW7601111729</SWENo>
+    <FTE>0</FTE>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>1993-04-18</RoleStartDate>
+    <StartOrigin>3</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>7</LeaverDestination>
+    <ReasonLeave>1</ReasonLeave>
+    <FTE30>0.63075</FTE30>
+    <Cases30>80</Cases30>
+    <ContractWeeks>299.1</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Jd1465867330</SWENo>
+    <FTE>0.034436</FTE>
+    <GenderCurrent>2</GenderCurrent>
+    <Ethnicity>APKN</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <RoleStartDate>1997-01-11</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.22182</FTE30>
+    <Cases30>23</Cases30>
+    <WorkingDaysLost>83.01</WorkingDaysLost>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>3</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>od1620971821</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1975-01-19</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <Ethnicity>WOTH</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>2016-08-20</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>5</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <Cases30>87</Cases30>
+    <WorkingDaysLost>13.01</WorkingDaysLost>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+</Message>
\ No newline at end of file
diff --git a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
index 8da0fed3..60a8164c 100644
--- a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
+++ b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
@@ -2,8 +2,8 @@
   <Header>
     <CollectionDetails>
       <Collection>CSWW</Collection>
-      <Year>2020</Year>
-      <ReferenceDate>2023-03-28</ReferenceDate>
+      <Year>2022</Year>
+      <ReferenceDate>2022-09-30</ReferenceDate>
     </CollectionDetails>
     <Source>
       <SourceLevel>L</SourceLevel>
diff --git a/liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd b/liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd
new file mode 100644
index 00000000..bc9f98a3
--- /dev/null
+++ b/liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd
@@ -0,0 +1,254 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+
+  <xs:element name="Message" type="messagetype"/>
+
+  <xs:complexType name="messagetype">
+    <xs:sequence>
+      <xs:element name="Header" type="headertype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="LALevelVacancies" type="vacancytype" minOccurs="1" maxOccurs="1"/>
+	  <xs:element name="CSWWWorker" type="workertype" minOccurs="1" maxOccurs="unbounded"/>
+    </xs:sequence>
+  </xs:complexType>
+
+  <xs:complexType name="headertype">
+    <xs:sequence>
+      <xs:element name="CollectionDetails" type="collectiondetailstype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="Source" type="sourcetype" minOccurs="0" maxOccurs="unbounded"/>
+    </xs:sequence>
+  </xs:complexType>
+
+  <xs:complexType name="collectiondetailstype">
+    <xs:sequence>
+      <xs:element name="Collection" minOccurs="0" maxOccurs="1">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="CSWW"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:element>
+      <xs:element name="Year" type="xs:gYear" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="ReferenceDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
+    </xs:sequence>
+  </xs:complexType>
+
+  <xs:complexType name="sourcetype">
+    <xs:sequence>
+      <xs:element name="SourceLevel" minOccurs="0" maxOccurs="1">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="L"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:element>
+      <xs:element name="LEA" minOccurs="1" maxOccurs="1">
+        <xs:simpleType>
+          <xs:restriction base="xs:string" >
+            <xs:pattern value="\d{3}"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:element>
+      <xs:element name="SoftwareCode" type="xs:string" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="DateTime" type="xs:dateTime" minOccurs="0" maxOccurs="1"/>
+    </xs:sequence>
+  </xs:complexType>
+
+  <xs:complexType name="vacancytype">
+    <xs:sequence>
+      <xs:element name="NumberOfVacancies" type="twodecimalplaces" minOccurs="1" maxOccurs="1"/>
+	  <xs:element name="NoAgencyFTE" type="twodecimalplaces" minOccurs="1" maxOccurs="1"/>
+	  <xs:element name="NoAgencyHeadcount" type="xs:integer" minOccurs="1" maxOccurs="1"/>
+    </xs:sequence>
+  </xs:complexType>
+
+  <xs:complexType name="workertype">
+    <xs:sequence>
+      <xs:element name="AgencyWorker" type="agencyworkertype" minOccurs="1" maxOccurs="1"/>
+      <xs:element name="SWENo" type="swetype" minOccurs="1" maxOccurs="1"/>
+      <xs:element name="FTE" type="ftetype" minOccurs="0" maxOccurs="1"/>
+	  <xs:element name="PersonBirthDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="GenderCurrent" type="gendertype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="Ethnicity" type="ethnicitytype" minOccurs="0" maxOccurs="1"/>
+	  <xs:element name="QualInst" type="xs:string" minOccurs="1" maxOccurs="unbounded"/>
+      <xs:element name="QualLevel" type="qualtype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="StepUpGrad" type="yesnotype" minOccurs="0" maxOccurs="1"/>
+	  <xs:element name="OrgRole" type="roletype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="RoleStartDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="StartOrigin" type="origintype" minOccurs="0" maxOccurs="1"/>
+	  <xs:element name="RoleEndDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="LeaverDestination" type="leavertype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="ReasonLeave" type="reasontype" minOccurs="0" maxOccurs="1"/>
+	  <xs:element name="FTE30" type="ftetype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="Cases30" type="xs:integer" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="WorkingDaysLost" type="twodecimalplaces" minOccurs="0" maxOccurs="1"/>
+	  <xs:element name="ContractWeeks" type="onedecimalplace" minOccurs="0" maxOccurs="unbounded"/>
+      <xs:element name="FrontlineGrad" type="yesnotype" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="Absat30Sept" type="yesnotype" minOccurs="0" maxOccurs="unbounded"/>
+	  <xs:element name="ReasonAbsence" type="absencetype" minOccurs="0" maxOccurs="unbounded"/>
+      <xs:element name="CFKSSstatus" type="cfksstype" minOccurs="0" maxOccurs="unbounded"/>
+    </xs:sequence>
+  </xs:complexType>
+  
+  <xs:simpleType name="onedecimalplace">
+    <xs:restriction base="xs:decimal">
+        <xs:fractionDigits fixed="true" value="1"/>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="twodecimalplaces">
+    <xs:restriction base="xs:decimal">
+        <xs:fractionDigits fixed="true" value="2"/>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="nonEmptyString">
+    <xs:restriction base="xs:string">
+      <xs:minLength value="1"/>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="agencyworkertype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="0"><xs:annotation><xs:documentation>Not an Agency Worker</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="1"><xs:annotation><xs:documentation>Agency Worker</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+
+  <xs:simpleType name="swetype">
+    <xs:restriction base="xs:string">
+      <xs:pattern value="[A-Za-z]{2}\d{10}"/>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="ftetype">
+    <xs:restriction base="xs:decimal">
+        <xs:fractionDigits fixed="true" value="6"/>
+		<xs:minInclusive value="0"/>
+		<xs:maxInclusive value="1"/>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="gendertype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="0"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Male</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="2"><xs:annotation><xs:documentation>Female</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Specified</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="ethnicitytype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="WBRI"><xs:annotation><xs:documentation>White British</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="WIRI"><xs:annotation><xs:documentation>White Irish</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="WOTH"><xs:annotation><xs:documentation>Any Other White Background</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="MWBC"><xs:annotation><xs:documentation>White and Black Caribbean</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="MWBA"><xs:annotation><xs:documentation>White and Black African</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="MWAS"><xs:annotation><xs:documentation>White and Asian</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="MOTH"><xs:annotation><xs:documentation>Any Other Mixed Background</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="AIND"><xs:annotation><xs:documentation>Indian</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="APKN"><xs:annotation><xs:documentation>Pakistani</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="ABAN"><xs:annotation><xs:documentation>Bangladeshi</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="AOTH"><xs:annotation><xs:documentation>Any Other Asian Background</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="BCRB"><xs:annotation><xs:documentation>Black Caribbean</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="BAFR"><xs:annotation><xs:documentation>Black African</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="BOTH"><xs:annotation><xs:documentation>Any Other Black Background</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="CHNE"><xs:annotation><xs:documentation>Chinese</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="OOTH"><xs:annotation><xs:documentation>Any Other Ethnic Group</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="REFU"><xs:annotation><xs:documentation>Declared not stated or Refused</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="NOBT"><xs:annotation><xs:documentation>Information Not Yet Obtained</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="qualtype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Under-graduate</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="2"><xs:annotation><xs:documentation>Post-graduate</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="3"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="yesnotype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="0"><xs:annotation><xs:documentation>No</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Yes</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="roletype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Senior Manager</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="2"><xs:annotation><xs:documentation>Middle Manager</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="3"><xs:annotation><xs:documentation>First Line Manager</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="4"><xs:annotation><xs:documentation>Senior Practicioner</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="5"><xs:annotation><xs:documentation>Case Holder</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="6"><xs:annotation><xs:documentation>Qualified Without Cases</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="origintype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Newly Qualified Social Workers</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="2"><xs:annotation><xs:documentation>Social Worker Role in a Different Local Authority in England</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="3"><xs:annotation><xs:documentation>Social Worker Role Outside England</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="4"><xs:annotation><xs:documentation>Agency or Consultancy in Social Work (in England)</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="5"><xs:annotation><xs:documentation>Other Social Work Role Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="6"><xs:annotation><xs:documentation>Other Social Care Role in Local Authority/Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="7"><xs:annotation><xs:documentation>Non-social Care Role/Any Role Outside England/No Employment/Career Break</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="8"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="10"><xs:annotation><xs:documentation>Not Yet Collected</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="leavertype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Social Worker Role in a Different Local Authority in England</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="2"><xs:annotation><xs:documentation>Social Worker Role Outside England</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="3"><xs:annotation><xs:documentation>Agency or Consultancy in Social Work (in England)</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="4"><xs:annotation><xs:documentation>Other Social Work Role Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="5"><xs:annotation><xs:documentation>Other Social Care Role in Local Authority/Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="6"><xs:annotation><xs:documentation>Non-social Care Role/Any Role Outside England/No Employment/Career Break</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="7"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="8"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Yet Collected</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="reasontype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Resignation</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="2"><xs:annotation><xs:documentation>Voluntary Redundancy</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="3"><xs:annotation><xs:documentation>Compulsory Redundancy</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="4"><xs:annotation><xs:documentation>Dismissed</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="5"><xs:annotation><xs:documentation>Retired</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="6"><xs:annotation><xs:documentation>Deceased</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="7"><xs:annotation><xs:documentation>Moved to a Non-child and Family Social Work Role Within LA</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="8"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="10"><xs:annotation><xs:documentation>Not Yet Collected</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="absencetype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="MAT"><xs:annotation><xs:documentation>Maternity/Paternity leave</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="OTH"><xs:annotation><xs:documentation>Other Paid Authorised Absence, Such As: Compassionate Leave, Annual Leave Requiring Reallocation Of Cases</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="PUB"><xs:annotation><xs:documentation>Paid Absence For Public Duties, Such As: Jury Duty</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="SIC"><xs:annotation><xs:documentation>Sick Leave</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="TRN"><xs:annotation><xs:documentation>Training</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="UNA"><xs:annotation><xs:documentation>Unauthorised Absence</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="UNP"><xs:annotation><xs:documentation>Unpaid Authorised Absence</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  
+  <xs:simpleType name="cfksstype">
+    <xs:restriction base="nonEmptyString">
+      <xs:enumeration value="1"><xs:annotation><xs:documentation>Assessed and Supported Year in Employment (AYSE)</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="2"><xs:annotation><xs:documentation>Frontline Practitioner</xs:documentation></xs:annotation></xs:enumeration>
+	  <xs:enumeration value="3"><xs:annotation><xs:documentation>Practice Supervisor</xs:documentation></xs:annotation></xs:enumeration>
+      <xs:enumeration value="4"><xs:annotation><xs:documentation>Practice Leader</xs:documentation></xs:annotation></xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+
+</xs:schema>
\ No newline at end of file

From 3abf03c7cf45a3c6bdd9065859b4699a4e56975a Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 10:47:51 +0000
Subject: [PATCH 11/40] Add la_agg functionality to workforce dataset

---
 .../social_work_workforce/csww_cli.py         | 25 +++++
 .../csww_main_functions.py                    | 91 +++++++++++++++--
 .../lds_csww_la_agg/configuration.py          | 75 ++++++++++++++
 .../lds_csww_la_agg/process.py                | 97 +++++++++++++++++++
 .../spec/social_work_workforce/la-agg.yml     | 67 +++++++++++++
 5 files changed, 347 insertions(+), 8 deletions(-)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_la_agg/configuration.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_la_agg/process.py
 create mode 100644 liiatools/spec/social_work_workforce/la-agg.yml

diff --git a/liiatools/datasets/social_work_workforce/csww_cli.py b/liiatools/datasets/social_work_workforce/csww_cli.py
index 406ce1c5..0b687767 100644
--- a/liiatools/datasets/social_work_workforce/csww_cli.py
+++ b/liiatools/datasets/social_work_workforce/csww_cli.py
@@ -35,3 +35,28 @@ def generate_sample(output: str):
     """
     output = csww_main_functions.generate_sample(output)
     return output
+
+
+@csww.command()
+@click.option(
+    "--i",
+    "input",
+    required=True,
+    type=str,
+    help="A string specifying the input file location, including the file name and suffix, usable by a pathlib Path function",
+)
+@click.option(
+    "--o",
+    "output",
+    required=True,
+    type=str,
+    help="A string specifying the output directory location",
+)
+def la_agg(input, output):
+    """
+    Joins data from newly cleaned CSWW files (output of cleanfile()) to existing CSWW files data for the depositing local authority
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param output: should specify the path to the output folder
+    :return: None
+    """
+    csww_main_functions.la_agg(input, output)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 8e41ade8..ced675fa 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 from datetime import datetime
+import yaml
 
 from liiatools.datasets.social_work_workforce.sample_data import (
     generate_sample_csww_file,
@@ -18,6 +19,7 @@
     csww_record,
 )
 
+from liiatools.spec import common as common_asset_dir
 from liiatools.datasets.shared_functions.common import (
     flip_dict,
     check_file_type,
@@ -28,6 +30,29 @@
     save_incorrect_year_error,
 )
 
+# dependencies for la_agg()
+from liiatools.datasets.social_work_workforce.lds_csww_la_agg import (
+    configuration as agg_config,
+)
+from liiatools.datasets.social_work_workforce.lds_csww_la_agg import (
+    process as agg_process,
+)
+
+# dependencies for pan_agg()
+# from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import configuration as pan_config
+# from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import process as pan_process
+
+
+COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
+# Get all the possible LA codes that could be used
+with open(f"{COMMON_CONFIG_DIR}/LA-codes.yml") as las:
+    la_list = list(yaml.full_load(las)["data_codes"].values())
+
+# Set constants for data retention period
+YEARS_TO_GO_BACK = 7
+YEAR_START_MONTH = 1
+REFERENCE_DATE = datetime.now()
+
 
 def generate_sample(output: str):
     """
@@ -84,12 +109,9 @@ def cleanfile(input, la_code, la_log_dir, output):
         return
 
     # Check year is within acceptable range for data retention policy
-    years_to_go_back = 7
-    year_start_month = 1
-    reference_date = datetime.now()
     if (
         check_year_within_range(
-            input_year, years_to_go_back, year_start_month, reference_date
+            input_year, YEARS_TO_GO_BACK, YEAR_START_MONTH, REFERENCE_DATE
         )
         is False
     ):
@@ -113,9 +135,62 @@ def cleanfile(input, la_code, la_log_dir, output):
     file_creator.export_file(input, output, data_worker, "worker")
 
 
-cleanfile(
-    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-    "BAD",
-    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+def la_agg(input, output):
+    """
+    Joins data from newly cleaned social work workforce census files (output of cleanfile()) to existing social work workforce census files for the depositing local authority
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param output: should specify the path to the output folder
+    :return: None
+    """
+
+    # Configuration
+    config = agg_config.Config()
+
+    # Open file as DataFrame and match file type
+    csww_df = agg_process.read_file(input)
+    print(f"csww_df = {csww_df}")
+    column_names = config["column_names"]
+    table_name = agg_process.match_load_file(csww_df, column_names)
+
+    # Merge file with existing file of the same type in LA output folder
+    csww_df = agg_process.merge_la_files(output, csww_df, table_name)
+
+    # De-duplicate and remove old data according to schema
+    if table_name == "CSWWWorker":
+        dates = config["dates"]
+        csww_df = agg_process.convert_datetimes(csww_df, dates, table_name)
+    sort_order = config["sort_order"]
+    dedup = config["dedup"]
+    csww_df = agg_process.deduplicate(csww_df, table_name, sort_order, dedup)
+    csww_df = agg_process.remove_old_data(
+        csww_df,
+        num_of_years=YEARS_TO_GO_BACK,
+        new_year_start_month=YEAR_START_MONTH,
+        as_at_date=REFERENCE_DATE,
+    )
+
+    # If file still has data, after removing old data: re-format and export merged file
+    if len(csww_df) > 0:
+        if table_name == "CSWWWorker":
+            csww_df = agg_process.convert_dates(csww_df, dates, table_name)
+        agg_process.export_la_file(output, table_name, csww_df)
+
+
+# Run in Visual Studio Code |>
+
+# cleanfile(
+#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml",
+#     "BAD",
+#     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
+
+# la_agg(
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2021_worker_clean.csv",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
+
+la_agg(
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 )
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_la_agg/configuration.py b/liiatools/datasets/social_work_workforce/lds_csww_la_agg/configuration.py
new file mode 100644
index 00000000..5bb508b9
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_la_agg/configuration.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+import logging
+import datetime
+import os
+import yaml
+from string import Template
+
+from liiatools.spec import social_work_workforce as csww_asset_dir
+
+log = logging.getLogger(__name__)
+
+DEFAULT_CONFIG_DIR = Path(csww_asset_dir.__file__).parent
+
+
+class Config(dict):
+    def __init__(self, config_file=None):
+        super().__init__()
+
+        if not config_file:
+            config_file = DEFAULT_CONFIG_DIR / "la-agg.yml"
+
+        self.load_config(config_file, conditional=False)
+
+        self["config_date"] = datetime.datetime.now().isoformat()
+        try:
+            self["username"] = os.getlogin()
+        except OSError:
+            # This happens when tests are not run under a login shell, e.g. CI pipeline
+            pass
+
+    def load_config(self, filename, conditional=False, warn=False):
+        """
+        Load configuration from yaml file. Any loaded configuration
+        is only set if the values don't already exist in CONFIG.
+
+        Files can contain ${} placeholders following the Python string.Template format.
+        The context will include any keys already existing in the configuration, any keys
+        from the current file - however, if these include placeholders, the placeholders
+        will not be replaced. Finally, environment variables can be referenced with
+        `os_environ_VARIABLE_NAME`.
+
+        Keyword arguments:
+        filename -- Filename to load from
+        conditional -- If True, ignore file if it doesn't exist. If False, fail. (default False)
+        """
+        if conditional and not os.path.isfile(filename):
+            if warn:
+                log.warning("Missing optional file {}".format(filename))
+
+            return
+
+        with open(filename) as FILE:
+            user_config = yaml.load(FILE, Loader=yaml.FullLoader)
+
+        log.info(
+            "Loading {} configuration values from '{}'.".format(
+                len(user_config), filename
+            )
+        )
+
+        environment_dict = {"os_environ_{}".format(k): v for k, v in os.environ.items()}
+
+        variables = dict(self)
+        variables.update(user_config)
+        variables.update(environment_dict)
+
+        with open(filename, "rt") as FILE:
+            user_config_string = FILE.read()
+
+        user_config_template = Template(user_config_string)
+        user_config_string = user_config_template.substitute(variables)
+
+        user_config = yaml.load(user_config_string, Loader=yaml.FullLoader)
+
+        self.update(user_config)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_la_agg/process.py b/liiatools/datasets/social_work_workforce/lds_csww_la_agg/process.py
new file mode 100644
index 00000000..44d4bd87
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_la_agg/process.py
@@ -0,0 +1,97 @@
+from pathlib import Path
+import pandas as pd
+import logging
+
+log = logging.getLogger(__name__)
+
+
+def read_file(file):
+    """
+    Reads the csv file as a pandas DataFrame
+    """
+    filepath = Path(file)
+    csww_df = pd.read_csv(filepath, index_col=None)
+    return csww_df
+
+
+def match_load_file(csww_df, column_names):
+    """
+    Matches the columns in the DataFrame against one of the 2 social work workforce file types
+    """
+    for table_name, expected_columns in column_names.items():
+        if set(csww_df.columns) == set(expected_columns):
+            return table_name
+
+
+def merge_la_files(output, csww_df, table_name):
+    """
+    Looks for existing file of the same type and merges with new file if found
+    """
+    old_file = Path(output, f"CSWW_{table_name}_merged.csv")
+    if old_file.is_file():
+        old_df = pd.read_csv(old_file, index_col=None)
+        merged_df = pd.concat([csww_df, old_df], axis=0)
+    else:
+        merged_df = csww_df
+    return merged_df
+
+
+def convert_datetimes(csww_df, dates, table_name):
+    """
+    Ensures that all date fields have been parsed as dates
+    """
+    for date_field in dates[table_name]:
+        csww_df[date_field] = pd.to_datetime(csww_df[date_field], format="%Y/%m/%d")
+    return csww_df
+
+
+def deduplicate(csww_df, table_name, sort_order, dedup):
+    """
+    Sorts and removes duplicate records from merged files following schema
+    """
+    csww_df = csww_df.sort_values(
+        sort_order[table_name], ascending=False, ignore_index=True
+    )
+    csww_df = csww_df.drop_duplicates(subset=dedup[table_name], keep="first")
+    return csww_df
+
+
+def remove_old_data(csww_df, num_of_years, new_year_start_month, as_at_date):
+    """
+    Removes data older than a specified number of years as at reference date
+
+    :param csww_df: Dataframe containing csv data
+    :param num_of_years: The number of years to go back
+    :param new_year_start_month: The month which signifies start of a new year for data retention policy
+    :param as_at_date: The reference date against which we are checking the valid range
+    :return: Dataframe with older years removed
+    """
+    current_year = pd.to_datetime(as_at_date).year
+    current_month = pd.to_datetime(as_at_date).month
+
+    if current_month < new_year_start_month:
+        earliest_allowed_year = current_year - num_of_years
+    else:
+        earliest_allowed_year = current_year - num_of_years + 1  # roll forward one year
+
+    csww_df = csww_df[csww_df["YEAR"] >= earliest_allowed_year]
+    return csww_df
+
+
+def convert_dates(csww_df, dates, table_name):
+    """
+    Ensures that all date fields have been parsed as dates
+    """
+    for date_field in dates[table_name]:
+        csww_df[date_field] = pd.to_datetime(
+            csww_df[date_field], format="%Y/%m/%d"
+        ).dt.date
+    return csww_df
+
+
+def export_la_file(output, table_name, csww_df):
+    """
+    Writes the output as a csv
+    """
+    output_path = Path(output, f"CSWW_{table_name}_merged.csv")
+    csww_df.to_csv(output_path, index=False)
diff --git a/liiatools/spec/social_work_workforce/la-agg.yml b/liiatools/spec/social_work_workforce/la-agg.yml
new file mode 100644
index 00000000..d3b909ff
--- /dev/null
+++ b/liiatools/spec/social_work_workforce/la-agg.yml
@@ -0,0 +1,67 @@
+column_names:
+    CSWWWorker:
+        - AgencyWorker
+        - SWENo
+        - FTE
+        - PersonBirthDate
+        - GenderCurrent
+        - Ethnicity
+        - QualInst
+        - StepUpGrad
+        - RoleStartDate
+        - StartOrigin
+        - Cases30
+        - WorkingDaysLost
+        - ContractWeeks
+        - FrontlineGrad
+        - Absat30Sept
+        - ReasonAbsence
+        - CFKSSstatus
+        - LA
+        - YEAR
+    LALevelVacancies:
+        - NumberOfVacancies
+        - NoAgencyFTE
+        - NoAgencyHeadcount
+        - LA
+        - YEAR    
+
+dates:
+    CSWWWorker:
+        - PersonBirthDate
+        - RoleStartDate
+
+sort_order:
+    CSWWWorker:
+        - YEAR
+    LALevelVacancies:
+        - YEAR
+
+dedup:
+    CSWWWorker:
+        - PersonBirthDate
+        - GenderCurrent
+        - Ethnicity
+        - QualInst
+        - StepUpGrad
+        - RoleStartDate
+        - StartOrigin
+        - Cases30
+        - WorkingDaysLost
+        - ContractWeeks
+        - FrontlineGrad
+        - Absat30Sept
+        - ReasonAbsence
+        - CFKSSstatus
+        - LA
+        - YEAR
+    LALevelVacancies:
+        - NumberOfVacancies
+        - NoAgencyFTE
+        - NoAgencyHeadcount
+        - LA
+        - YEAR  
+
+
+
+        
\ No newline at end of file

From 9782042e05f9259ffa9f29c6ed0ea4aae5fe21e7 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 11:22:11 +0000
Subject: [PATCH 12/40] Remove duplicate schema.py from workforce

---
 .../datasets/social_work_workforce/csww_main_functions.py | 8 ++++----
 liiatools/datasets/social_work_workforce/sample_data.py   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index ced675fa..5ddd167d 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -190,7 +190,7 @@ def la_agg(input, output):
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 # )
 
-la_agg(
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
+# la_agg(
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
diff --git a/liiatools/datasets/social_work_workforce/sample_data.py b/liiatools/datasets/social_work_workforce/sample_data.py
index e8fb749e..3ed1333a 100644
--- a/liiatools/datasets/social_work_workforce/sample_data.py
+++ b/liiatools/datasets/social_work_workforce/sample_data.py
@@ -5,7 +5,7 @@
 
 from sfdata_stream_parser.events import StartElement, EndElement, TextNode
 
-from liiatools.datasets.social_work_workforce.schema import Schema
+from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import Schema
 
 
 def TextElement(tag: str, text):

From 27119060bc1f66badd1f45ae26aebdb9f5ac3cbf Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 11:22:34 +0000
Subject: [PATCH 13/40] Remove duplicate schema.py from workforce

---
 .../datasets/social_work_workforce/schema.py   | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 liiatools/datasets/social_work_workforce/schema.py

diff --git a/liiatools/datasets/social_work_workforce/schema.py b/liiatools/datasets/social_work_workforce/schema.py
deleted file mode 100644
index aebb3226..00000000
--- a/liiatools/datasets/social_work_workforce/schema.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from functools import cached_property
-from pathlib import Path
-
-from xmlschema import XMLSchema
-
-from liiatools.spec import social_work_workforce as social_work_workforce_dir
-
-
-class Schema:
-    def __init__(self, year: int = 2022):
-        self.__year = year
-
-    @cached_property
-    def schema(self) -> XMLSchema:
-        return XMLSchema(
-            Path(social_work_workforce_dir.__file__).parent
-            / f"social_work_workforce_{self.__year}.xsd"
-        )

From 1889518bdf4151da3c7feec688d733bb76cd4868 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 12:16:15 +0000
Subject: [PATCH 14/40] Create separate degrade_data function

---
 .../social_work_workforce/csww_main_functions.py      | 10 ++++++----
 .../lds_csww_clean/file_creator.py                    | 11 ++++++++++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 5ddd167d..44548cd7 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -129,10 +129,13 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = csww_record.message_collector(stream)
 
     data_worker, data_lalevel = csww_record.export_table(stream)
+
     data_worker = file_creator.add_fields(input_year, data_worker, la_name)
+    data_worker = file_creator.degrade_data(data_worker)
+    file_creator.export_file(input, output, data_worker, "worker")
+
     data_lalevel = file_creator.add_fields(input_year, data_lalevel, la_name)
     file_creator.export_file(input, output, data_lalevel, "lalevel")
-    file_creator.export_file(input, output, data_worker, "worker")
 
 
 def la_agg(input, output):
@@ -148,7 +151,6 @@ def la_agg(input, output):
 
     # Open file as DataFrame and match file type
     csww_df = agg_process.read_file(input)
-    print(f"csww_df = {csww_df}")
     column_names = config["column_names"]
     table_name = agg_process.match_load_file(csww_df, column_names)
 
@@ -179,14 +181,14 @@ def la_agg(input, output):
 # Run in Visual Studio Code |>
 
 # cleanfile(
-#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml",
+#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
 #     "BAD",
 #     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 # )
 
 # la_agg(
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2021_worker_clean.csv",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 # )
 
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index 3f383dcd..66fe79a0 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -78,12 +78,21 @@ def add_fields(input_year, data, la_name):
     :param input_year: A string of the year of return for the current file
     :param data: The dataframe to be cleaned
     :param la_name: LA name
-    :return: Cleaned and degraded dataframe
+    :return: Dataframe with year and LA added
     """
     data = convert_to_dataframe(data)
     data = get_year(data, input_year)
     data = add_la_name(data, la_name)
+    return data
+
+
+def degrade_data(data):
+    """
+    Degrade DoB to first of month and replace SWENo with hash code version
 
+    :param data: The dataframe to be cleaned
+    :return: Dataframe with degraded data
+    """
     data = convert_to_datetime(data)
     data = degrade_dob(data)
     data = degrade_SWENo(data)

From b5be8c5f51aca754be7a915dde5626e86c42bca4 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 12:26:15 +0000
Subject: [PATCH 15/40] Copy s903 pan_agg files to workforce

---
 .../lds_csww_pan_agg/configuration.py         |  85 ++++++++++++++
 .../lds_csww_pan_agg/process.py               |  53 +++++++++
 .../spec/social_work_workforce/pan-agg.yml    | 109 ++++++++++++++++++
 3 files changed, 247 insertions(+)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py
 create mode 100644 liiatools/spec/social_work_workforce/pan-agg.yml

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py
new file mode 100644
index 00000000..3d7b6f00
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py
@@ -0,0 +1,85 @@
+from pathlib import Path
+import logging
+import datetime
+import os
+import yaml
+from string import Template
+
+from liiatools.spec import s903 as s903_asset_dir
+from liiatools.spec import common as common_asset_dir
+
+log = logging.getLogger(__name__)
+
+DEFAULT_CONFIG_DIR = Path(s903_asset_dir.__file__).parent
+COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
+
+
+class Config(dict):
+    def __init__(self, config_files=None):
+        super().__init__()
+
+        if not config_files:
+            config_files = [
+                "PAN_AGG_CONFIG",
+                "LA_CODES_CONFIG",
+            ]
+
+        for file in config_files:
+            if file == "PAN_AGG_CONFIG":
+                file = DEFAULT_CONFIG_DIR / "pan-agg.yml"
+            elif file == "LA_CODES_CONFIG":
+                file = COMMON_CONFIG_DIR / "LA-codes.yml"
+            self.load_config(file, conditional=False)
+
+        self["config_date"] = datetime.datetime.now().isoformat()
+        try:
+            self["username"] = os.getlogin()
+        except OSError:
+            # This happens when tests are not run under a login shell, e.g. CI pipeline
+            pass
+
+    def load_config(self, filename, conditional=False, warn=False):
+        """
+        Load configuration from yaml file. Any loaded configuration
+        is only set if the values don't already exist in CONFIG.
+
+        Files can contain ${} placeholders following the Python string.Template format.
+        The context will include any keys already existing in the configuration, any keys
+        from the current file - however, if these include placeholders, the placeholders
+        will not be replaced. Finally, environment variables can be referenced with
+        `os_environ_VARIABLE_NAME`.
+
+        Keyword arguments:
+        filename -- Filename to load from
+        conditional -- If True, ignore file if it doesn't exist. If False, fail. (default False)
+        """
+        if conditional and not os.path.isfile(filename):
+            if warn:
+                log.warning("Missing optional file {}".format(filename))
+
+            return
+
+        with open(filename) as FILE:
+            user_config = yaml.load(FILE, Loader=yaml.FullLoader)
+
+        log.info(
+            "Loading {} configuration values from '{}'.".format(
+                len(user_config), filename
+            )
+        )
+
+        environment_dict = {"os_environ_{}".format(k): v for k, v in os.environ.items()}
+
+        variables = dict(self)
+        variables.update(user_config)
+        variables.update(environment_dict)
+
+        with open(filename, "rt") as FILE:
+            user_config_string = FILE.read()
+
+        user_config_template = Template(user_config_string)
+        user_config_string = user_config_template.substitute(variables)
+
+        user_config = yaml.load(user_config_string, Loader=yaml.FullLoader)
+
+        self.update(user_config)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py
new file mode 100644
index 00000000..4417bc92
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py
@@ -0,0 +1,53 @@
+from pathlib import Path
+import pandas as pd
+import logging
+
+log = logging.getLogger(__name__)
+
+
+def read_file(file):
+    """
+    Reads the csv file as a pandas DataFrame
+    """
+    filepath = Path(file)
+    s903_df = pd.read_csv(filepath, index_col=None)
+    return s903_df
+
+
+def match_load_file(s903_df, column_names):
+    """
+    Matches the columns in the DataFrame against one of the 10 SSDA903 file types
+    """
+    for table_name, expected_columns in column_names.items():
+        if set(s903_df.columns) == set(expected_columns):
+            return table_name
+
+
+def _merge_dfs(s903_df, old_df, la_name):
+    """
+    Deletes existing data for new LA from pan file
+    Merges new LA data to pan file
+    """
+    old_df = old_df.drop(old_df[old_df["LA"] == la_name].index)
+    s903_df = pd.concat([s903_df, old_df], axis=0, ignore_index=True)
+    return s903_df
+
+
+def merge_agg_files(output, table_name, s903_df, la_name):
+    """
+    Checks if pan file exists
+    Passes old and new file to function to be merged
+    """
+    output_file = Path(output, f"pan_London_SSDA903_{table_name}.csv")
+    if output_file.is_file():
+        old_df = pd.read_csv(output_file, index_col=None)
+        s903_df = _merge_dfs(s903_df, old_df, la_name)
+    return s903_df
+
+
+def export_pan_file(output, table_name, s903_df):
+    """
+    Writes file to output directory
+    """
+    output_path = Path(output, f"pan_London_SSDA903_{table_name}.csv")
+    s903_df.to_csv(output_path, index=False)
diff --git a/liiatools/spec/social_work_workforce/pan-agg.yml b/liiatools/spec/social_work_workforce/pan-agg.yml
new file mode 100644
index 00000000..38b3dee2
--- /dev/null
+++ b/liiatools/spec/social_work_workforce/pan-agg.yml
@@ -0,0 +1,109 @@
+column_names:
+    Header:
+        - CHILD
+        - SEX
+        - DOB
+        - ETHNIC
+        - UPN
+        - MOTHER
+        - MC_DOB
+        - LA
+        - YEAR
+    Episodes:
+        - CHILD
+        - DECOM
+        - RNE
+        - LS
+        - CIN
+        - PLACE
+        - PLACE_PROVIDER
+        - DEC
+        - REC
+        - REASON_PLACE_CHANGE
+        - HOME_POST
+        - PL_POST
+        - URN
+        - LA
+        - YEAR
+    Reviews:
+        - CHILD
+        - DOB
+        - REVIEW
+        - REVIEW_CODE
+        - LA
+        - YEAR
+    UASC:
+        - CHILD
+        - SEX
+        - DOB
+        - DUC
+        - LA
+        - YEAR
+    OC2:
+        - CHILD
+        - DOB
+        - SDQ_SCORE
+        - SDQ_REASON
+        - CONVICTED
+        - HEALTH_CHECK
+        - IMMUNISATIONS
+        - TEETH_CHECK
+        - HEALTH_ASSESSMENT
+        - SUBSTANCE_MISUSE
+        - INTERVENTION_RECEIVED
+        - INTERVENTION_OFFERED
+        - LA
+        - YEAR
+    OC3:
+        - CHILD
+        - DOB
+        - IN_TOUCH
+        - ACTIV
+        - ACCOM
+        - LA
+        - YEAR
+    AD1:
+        - CHILD
+        - DOB
+        - DATE_INT
+        - DATE_MATCH
+        - FOSTER_CARE
+        - NB_ADOPTR
+        - SEX_ADOPTR
+        - LS_ADOPTR
+        - LA
+        - YEAR
+    PlacedAdoption:
+        - CHILD
+        - DOB
+        - DATE_PLACED
+        - DATE_PLACED_CEASED
+        - REASON_PLACED_CEASED
+        - LA
+        - YEAR
+    PrevPerm:
+        - CHILD
+        - DOB
+        - PREV_PERM
+        - LA_PERM
+        - DATE_PERM
+        - LA
+        - YEAR
+    Missing:
+        - CHILD
+        - DOB
+        - MISSING
+        - MIS_START
+        - MIS_END
+        - LA
+        - YEAR
+
+pan_data_kept:
+    - Header
+    - Episodes
+    - Reviews
+    - UASC
+    - OC2
+    - OC3
+    - PrevPerm
+    - Missing
\ No newline at end of file

From 9fdabda90f03286d5827b76f5feca64a0212f3ff Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 13:25:38 +0000
Subject: [PATCH 16/40] Add pan_agg functionality to workforce

---
 .../social_work_workforce/csww_cli.py         |  32 +++++
 .../csww_main_functions.py                    |  67 ++++++---
 .../lds_csww_pan_agg/configuration.py         |   6 +-
 .../lds_csww_pan_agg/process.py               |  30 ++--
 .../spec/social_work_workforce/pan-agg.yml    | 134 ++++--------------
 5 files changed, 127 insertions(+), 142 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_cli.py b/liiatools/datasets/social_work_workforce/csww_cli.py
index 0b687767..68e8600c 100644
--- a/liiatools/datasets/social_work_workforce/csww_cli.py
+++ b/liiatools/datasets/social_work_workforce/csww_cli.py
@@ -60,3 +60,35 @@ def la_agg(input, output):
     :return: None
     """
     csww_main_functions.la_agg(input, output)
+
+
+@csww.command()
+@click.option(
+    "--i",
+    "input",
+    required=True,
+    type=str,
+    help="A string specifying the input file location, including the file name and suffix, usable by a pathlib Path function",
+)
+@click.option(
+    "--la_code",
+    required=True,
+    type=click.Choice(la_list, case_sensitive=False),
+    help="A three letter code, specifying the local authority that deposited the file",
+)
+@click.option(
+    "--o",
+    "output",
+    required=True,
+    type=str,
+    help="A string specifying the output directory location",
+)
+def pan_agg(input, la_code, output):
+    """
+    Joins data from newly merged social work workforce file (output of la-agg()) to existing pan-London social work workforce data
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param la_code: should be a three-letter string for the local authority depositing the file
+    :param output: should specify the path to the output folder
+    :return: None
+    """
+    csww_main_functions.pan_agg(input, la_code, output)
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 44548cd7..da651312 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -39,8 +39,8 @@
 )
 
 # dependencies for pan_agg()
-# from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import configuration as pan_config
-# from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import process as pan_process
+from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import configuration as pan_config
+from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import process as pan_process
 
 
 COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
@@ -178,21 +178,52 @@ def la_agg(input, output):
         agg_process.export_la_file(output, table_name, csww_df)
 
 
+def pan_agg(input, la_code, output):
+    """
+    Joins data from newly merged social work workforce file (output of la-agg()) to existing pan-London workforce data
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param la_code: should be a three-letter string for the local authority depositing the file
+    :param output: should specify the path to the output folder
+    :return: None
+    """
+
+    # Configuration
+    config = pan_config.Config()
+
+    # Read file and match type
+    csww_df = pan_process.read_file(input)
+    column_names = config["column_names"]
+    table_name = pan_process.match_load_file(csww_df, column_names)
+
+    # Remove unwanted datasets and merge wanted with existing output
+    pan_data_kept = config["pan_data_kept"]
+    if table_name in pan_data_kept:
+        la_name = flip_dict(config["data_codes"])[la_code]
+        csww_df = pan_process.merge_agg_files(output, table_name, csww_df, la_name)
+        pan_process.export_pan_file(output, table_name, csww_df)
+
+
 # Run in Visual Studio Code |>
 
-# cleanfile(
-#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-#     "BAD",
-#     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
-
-# la_agg(
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
-
-# la_agg(
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
+cleanfile(
+    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml",
+    "NEW",
+    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
+
+la_agg(
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
+
+la_agg(
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
+
+pan_agg(
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/CSWW_CSWWWorker_merged.csv",
+    "NEW",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py
index 3d7b6f00..f6fd1f53 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/configuration.py
@@ -2,15 +2,15 @@
 import logging
 import datetime
 import os
-import yaml
 from string import Template
+import yaml
 
-from liiatools.spec import s903 as s903_asset_dir
+from liiatools.spec import social_work_workforce as csww_asset_dir
 from liiatools.spec import common as common_asset_dir
 
 log = logging.getLogger(__name__)
 
-DEFAULT_CONFIG_DIR = Path(s903_asset_dir.__file__).parent
+DEFAULT_CONFIG_DIR = Path(csww_asset_dir.__file__).parent
 COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
 
 
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py
index 4417bc92..329fe23d 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_pan_agg/process.py
@@ -1,6 +1,6 @@
 from pathlib import Path
-import pandas as pd
 import logging
+import pandas as pd
 
 log = logging.getLogger(__name__)
 
@@ -10,44 +10,44 @@ def read_file(file):
     Reads the csv file as a pandas DataFrame
     """
     filepath = Path(file)
-    s903_df = pd.read_csv(filepath, index_col=None)
-    return s903_df
+    csww_df = pd.read_csv(filepath, index_col=None)
+    return csww_df
 
 
-def match_load_file(s903_df, column_names):
+def match_load_file(csww_df, column_names):
     """
     Matches the columns in the DataFrame against one of the 10 SSDA903 file types
     """
     for table_name, expected_columns in column_names.items():
-        if set(s903_df.columns) == set(expected_columns):
+        if set(csww_df.columns) == set(expected_columns):
             return table_name
 
 
-def _merge_dfs(s903_df, old_df, la_name):
+def _merge_dfs(csww_df, old_df, la_name):
     """
     Deletes existing data for new LA from pan file
     Merges new LA data to pan file
     """
     old_df = old_df.drop(old_df[old_df["LA"] == la_name].index)
-    s903_df = pd.concat([s903_df, old_df], axis=0, ignore_index=True)
-    return s903_df
+    csww_df = pd.concat([csww_df, old_df], axis=0, ignore_index=True)
+    return csww_df
 
 
-def merge_agg_files(output, table_name, s903_df, la_name):
+def merge_agg_files(output, table_name, csww_df, la_name):
     """
     Checks if pan file exists
     Passes old and new file to function to be merged
     """
-    output_file = Path(output, f"pan_London_SSDA903_{table_name}.csv")
+    output_file = Path(output, f"pan_London_CSWW_{table_name}.csv")
     if output_file.is_file():
         old_df = pd.read_csv(output_file, index_col=None)
-        s903_df = _merge_dfs(s903_df, old_df, la_name)
-    return s903_df
+        csww_df = _merge_dfs(csww_df, old_df, la_name)
+    return csww_df
 
 
-def export_pan_file(output, table_name, s903_df):
+def export_pan_file(output, table_name, csww_df):
     """
     Writes file to output directory
     """
-    output_path = Path(output, f"pan_London_SSDA903_{table_name}.csv")
-    s903_df.to_csv(output_path, index=False)
+    output_path = Path(output, f"pan_London_CSWW_{table_name}.csv")
+    csww_df.to_csv(output_path, index=False)
diff --git a/liiatools/spec/social_work_workforce/pan-agg.yml b/liiatools/spec/social_work_workforce/pan-agg.yml
index 38b3dee2..c6bdad4e 100644
--- a/liiatools/spec/social_work_workforce/pan-agg.yml
+++ b/liiatools/spec/social_work_workforce/pan-agg.yml
@@ -1,109 +1,31 @@
 column_names:
-    Header:
-        - CHILD
-        - SEX
-        - DOB
-        - ETHNIC
-        - UPN
-        - MOTHER
-        - MC_DOB
-        - LA
-        - YEAR
-    Episodes:
-        - CHILD
-        - DECOM
-        - RNE
-        - LS
-        - CIN
-        - PLACE
-        - PLACE_PROVIDER
-        - DEC
-        - REC
-        - REASON_PLACE_CHANGE
-        - HOME_POST
-        - PL_POST
-        - URN
-        - LA
-        - YEAR
-    Reviews:
-        - CHILD
-        - DOB
-        - REVIEW
-        - REVIEW_CODE
-        - LA
-        - YEAR
-    UASC:
-        - CHILD
-        - SEX
-        - DOB
-        - DUC
-        - LA
-        - YEAR
-    OC2:
-        - CHILD
-        - DOB
-        - SDQ_SCORE
-        - SDQ_REASON
-        - CONVICTED
-        - HEALTH_CHECK
-        - IMMUNISATIONS
-        - TEETH_CHECK
-        - HEALTH_ASSESSMENT
-        - SUBSTANCE_MISUSE
-        - INTERVENTION_RECEIVED
-        - INTERVENTION_OFFERED
-        - LA
-        - YEAR
-    OC3:
-        - CHILD
-        - DOB
-        - IN_TOUCH
-        - ACTIV
-        - ACCOM
-        - LA
-        - YEAR
-    AD1:
-        - CHILD
-        - DOB
-        - DATE_INT
-        - DATE_MATCH
-        - FOSTER_CARE
-        - NB_ADOPTR
-        - SEX_ADOPTR
-        - LS_ADOPTR
-        - LA
-        - YEAR
-    PlacedAdoption:
-        - CHILD
-        - DOB
-        - DATE_PLACED
-        - DATE_PLACED_CEASED
-        - REASON_PLACED_CEASED
-        - LA
-        - YEAR
-    PrevPerm:
-        - CHILD
-        - DOB
-        - PREV_PERM
-        - LA_PERM
-        - DATE_PERM
-        - LA
-        - YEAR
-    Missing:
-        - CHILD
-        - DOB
-        - MISSING
-        - MIS_START
-        - MIS_END
-        - LA
-        - YEAR
+    CSWWWorker:
+        - AgencyWorker
+        - SWENo
+        - FTE
+        - PersonBirthDate
+        - GenderCurrent
+        - Ethnicity
+        - QualInst
+        - StepUpGrad
+        - RoleStartDate
+        - StartOrigin
+        - Cases30
+        - WorkingDaysLost
+        - ContractWeeks
+        - FrontlineGrad
+        - Absat30Sept
+        - ReasonAbsence
+        - CFKSSstatus
+        - LA
+        - YEAR
+    LALevelVacancies:
+        - NumberOfVacancies
+        - NoAgencyFTE
+        - NoAgencyHeadcount
+        - LA
+        - YEAR  
 
 pan_data_kept:
-    - Header
-    - Episodes
-    - Reviews
-    - UASC
-    - OC2
-    - OC3
-    - PrevPerm
-    - Missing
\ No newline at end of file
+    - CSWWWorker
+    - LALevelVacanciespisodes
\ No newline at end of file

From 1332e12ed1ff86c1127bc309b38c8a4836c33039 Mon Sep 17 00:00:00 2001
From: patrick-troy <58770937+patrick-troy@users.noreply.github.com>
Date: Wed, 5 Jul 2023 15:33:58 +0100
Subject: [PATCH 17/40] add start of cleaning functionality

---
 .../csdatatools/datasets/cincensus/filters.py | 50 +++++++++++++++++--
 .../csww_main_functions.py                    |  8 ++-
 .../lds_csww_clean/cleaner.py                 | 30 +++++++++++
 .../lds_csww_clean/schema.py                  |  9 ++++
 4 files changed, 90 insertions(+), 7 deletions(-)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py

diff --git a/liiatools/csdatatools/datasets/cincensus/filters.py b/liiatools/csdatatools/datasets/cincensus/filters.py
index cd34b680..c6d7f313 100644
--- a/liiatools/csdatatools/datasets/cincensus/filters.py
+++ b/liiatools/csdatatools/datasets/cincensus/filters.py
@@ -1,12 +1,13 @@
 import logging
 from typing import List
-
+import xml.etree.ElementTree as ET
 import xmlschema
+from xmlschema import XMLSchemaValidatorError
+
 from sfdata_stream_parser.checks import type_check
 from sfdata_stream_parser import events
 from sfdata_stream_parser.collectors import collector, block_check
 from sfdata_stream_parser.filters.generic import streamfilter, pass_event
-from xmlschema import XMLSchemaValidatorError
 
 log = logging.getLogger(__name__)
 
@@ -29,7 +30,6 @@ def add_context(event, context: List[str]):
         context.pop()
     else:
         local_context = tuple(context)
-
     return event.from_event(event, context=local_context)
 
 
@@ -54,8 +54,43 @@ def strip_text(event):
         return None
 
 
+def _create_category_dict(field: str, file: str):
+    """
+    Create a dictionary containing the different categorical values of a given field to conform categories
+    e.g. {'category': [{'code': '0', 'name': 'Not an Agency Worker'}, {'code': '1', 'name': 'Agency Worker'}]}
+
+    :param field: Name of the categorical field you want to find the values for
+    :param file: Path to the .xsd schema containing possible categories
+    :return: Dictionary of categorical values and potential alternatives
+    """
+    category_dict = {"category": []}
+
+    xsd_xml = ET.parse(file)
+    search_elem = f".//{{http://www.w3.org/2001/XMLSchema}}simpleType[@name='{field}']"
+    element = xsd_xml.find(search_elem)
+
+    if element is not None:
+        search_value = f".//{{http://www.w3.org/2001/XMLSchema}}enumeration"
+        value = element.findall(search_value)
+        if value:
+            for v in value:
+                code_dict = {"code": v.get("value")}
+                category_dict["category"].append(code_dict)
+
+            search_doc = f".//{{http://www.w3.org/2001/XMLSchema}}documentation"
+            documentation = element.findall(search_doc)
+            for i, d in enumerate(documentation):
+                name_dict = {"name": d.text}
+                category_dict["category"][i] = {**category_dict["category"][i], **name_dict}
+
+            return category_dict
+
+    else:
+        return
+
+
 @streamfilter()
-def add_schema(event, schema: xmlschema.XMLSchema):
+def add_schema(event, schema: xmlschema.XMLSchema, schema_path: str):
     """
     Requires each event to have event.context as set by :func:`add_context`
 
@@ -65,13 +100,18 @@ def add_schema(event, schema: xmlschema.XMLSchema):
 
     Provides: path, schema
     """
+    schema_dict = None
     assert (
         event.context
     ), "This filter required event.context to be set - see add_context"
     path = "/".join(event.context)
     tag = event.context[-1]
     el = schema.get_element(tag, path)
-    return event.from_event(event, path=path, schema=el)
+
+    if el.type.name is not None and el.type.name[-4:] == "type":
+        schema_dict = _create_category_dict(el.type.name, schema_path)
+
+    return event.from_event(event, path=path, schema=el, schema_dict=schema_dict)
 
 
 def _get_validation_error(schema, node) -> XMLSchemaValidatorError:
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 44548cd7..3abf6bad 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -11,12 +11,13 @@
 # Dependencies for cleanfile()
 from liiatools.csdatatools.util.xml import dom_parse
 from liiatools.csdatatools.datasets.cincensus import filters
-from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import Schema
+from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import Schema, FilePath
 
 from liiatools.datasets.social_work_workforce.lds_csww_clean import (
     file_creator,
     configuration as clean_config,
     csww_record,
+    cleaner,
 )
 
 from liiatools.spec import common as common_asset_dir
@@ -123,7 +124,10 @@ def cleanfile(input, la_code, la_log_dir, output):
     la_name = flip_dict(config["data_codes"])[la_code]
     stream = filters.strip_text(stream)
     stream = filters.add_context(stream)
-    stream = filters.add_schema(stream, schema=Schema(input_year).schema)
+    stream = filters.add_schema(stream, schema=Schema(input_year).schema, schema_path=FilePath(input_year).path)
+
+    # Clean stream
+    stream = cleaner.clean_dates(stream, schema=FilePath(input_year).path)
 
     # Output results
     stream = csww_record.message_collector(stream)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
new file mode 100644
index 00000000..0ecc10ee
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -0,0 +1,30 @@
+import logging
+
+from sfdata_stream_parser.checks import type_check
+from sfdata_stream_parser import events
+from sfdata_stream_parser.filters.generic import streamfilter, pass_event
+
+from liiatools.datasets.s903.lds_ssda903_clean.converters import to_category, to_integer
+
+from liiatools.datasets.shared_functions.converters import to_date
+from liiatools.datasets.shared_functions.common import check_postcode
+
+log = logging.getLogger(__name__)
+
+
+@streamfilter(
+    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+)
+def clean_dates(event):
+    """
+    Convert all values that should be dates to dates based on the config.yaml file
+
+    :param event: A filtered list of event objects of type Cell
+    :return: An updated list of event objects
+    """
+    date = event.config_dict["date"]
+    try:
+        text = to_date(event.cell, date)
+        return event.from_event(event, cell=text, error="0")
+    except (AttributeError, TypeError, ValueError):
+        return event.from_event(event, cell="", error="1")
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
index f82b5eb1..cf9c6436 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
@@ -6,6 +6,15 @@
 from liiatools.spec import social_work_workforce as csww_asset_dir
 
 
+class FilePath:
+    def __init__(self, year):
+        self.__year = year
+
+    @cached_property
+    def path(self):
+        return Path(csww_asset_dir.__file__).parent / f"social_work_workforce_{self.__year}.xsd"
+
+
 class Schema:
     def __init__(self, year):
         self.__year = year

From 732541bbc8aeda5a2423c2969944e87bd78d70bc Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Thu, 6 Jul 2023 11:34:47 +0000
Subject: [PATCH 18/40] Implement cli cleanfile functionality in workforce

---
 .../social_work_workforce/csww_cli.py         |  56 +-
 .../csww_main_functions.py                    |  68 ++-
 .../lds_csww_clean/cleaner.py                 |  25 +-
 ...cial_work_workforce_2022_lalevel_clean.csv |   2 +
 ...ocial_work_workforce_2022_worker_clean.csv |  31 +
 .../csww/NEW/social_work_workforce_2022.xml   | 556 ++++++++++++++++++
 6 files changed, 702 insertions(+), 36 deletions(-)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv
 create mode 100644 liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml

diff --git a/liiatools/datasets/social_work_workforce/csww_cli.py b/liiatools/datasets/social_work_workforce/csww_cli.py
index 68e8600c..99f7ca33 100644
--- a/liiatools/datasets/social_work_workforce/csww_cli.py
+++ b/liiatools/datasets/social_work_workforce/csww_cli.py
@@ -1,12 +1,20 @@
 import logging
-import click as click
+from pathlib import Path
+import click
+import yaml
 import click_log
 
 from liiatools.datasets.social_work_workforce import csww_main_functions
 
+from liiatools.spec import common as common_asset_dir
 
-logger = logging.getLogger()
-click_log.basic_config(logger)
+log = logging.getLogger()
+click_log.basic_config(log)
+
+COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
+# Get all the possible LA codes that could be used
+with open(f"{COMMON_CONFIG_DIR}/LA-codes.yml") as las:
+    la_list = list(yaml.full_load(las)["data_codes"].values())
 
 
 @click.group()
@@ -16,6 +24,46 @@ def csww():
     """
     pass
 
+@csww.command()
+@click.option(
+    "--i",
+    "input",
+    required=True,
+    type=str,
+    help="A string specifying the input file location, including the file name and suffix, usable by a pathlib Path function",
+)
+@click.option(
+    "--la_code",
+    required=True,
+    type=click.Choice(la_list, case_sensitive=False),
+    help="A three letter code, specifying the local authority that deposited the file",
+)
+@click.option(
+    "--la_log_dir",
+    required=True,
+    type=str,
+    help="A string specifying the location that the log files for the LA should be output, usable by a pathlib Path function.",
+)
+@click.option(
+    "--o",
+    "output",
+    required=True,
+    type=str,
+    help="A string specifying the output directory location",
+)
+@click_log.simple_verbosity_option(log)
+def cleanfile(input, la_code, la_log_dir, output):
+    """
+    Cleans input social work workforce xml files according to config and outputs cleaned csv files.
+    :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+    :param la_code: should be a three-letter string for the local authority depositing the file
+    :param la_log_dir: should specify the path to the local authority's log folder
+    :param output: should specify the path to the output folder
+    :return: None
+    """
+    output = csww_main_functions.cleanfile(input, la_code, la_log_dir, output)
+    return output
+
 
 @csww.command()
 @click.option(
@@ -25,7 +73,7 @@ def csww():
     type=str,
     help="A string specifying the output file location, including the file name and suffix",
 )
-@click_log.simple_verbosity_option(logger)
+@click_log.simple_verbosity_option(log)
 def generate_sample(output: str):
     """
     Export a sample file for testing
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index cdc19aec..04833111 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -6,12 +6,14 @@
     generate_sample_csww_file,
 )
 from liiatools.csdatatools.util.stream import consume
-from liiatools.csdatatools.util.xml import etree, to_xml
+from liiatools.csdatatools.util.xml import etree, to_xml, dom_parse
 
 # Dependencies for cleanfile()
-from liiatools.csdatatools.util.xml import dom_parse
 from liiatools.csdatatools.datasets.cincensus import filters
-from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import Schema, FilePath
+from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import (
+    Schema,
+    FilePath,
+)
 
 from liiatools.datasets.social_work_workforce.lds_csww_clean import (
     file_creator,
@@ -34,14 +36,14 @@
 # dependencies for la_agg()
 from liiatools.datasets.social_work_workforce.lds_csww_la_agg import (
     configuration as agg_config,
-)
-from liiatools.datasets.social_work_workforce.lds_csww_la_agg import (
     process as agg_process,
 )
 
 # dependencies for pan_agg()
-from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import configuration as pan_config
-from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import process as pan_process
+from liiatools.datasets.social_work_workforce.lds_csww_pan_agg import (
+    configuration as pan_config,
+    process as pan_process,
+)
 
 
 COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
@@ -124,10 +126,14 @@ def cleanfile(input, la_code, la_log_dir, output):
     la_name = flip_dict(config["data_codes"])[la_code]
     stream = filters.strip_text(stream)
     stream = filters.add_context(stream)
-    stream = filters.add_schema(stream, schema=Schema(input_year).schema, schema_path=FilePath(input_year).path)
+    stream = filters.add_schema(
+        stream, schema=Schema(input_year).schema, schema_path=FilePath(input_year).path
+    )
 
     # Clean stream
-    stream = cleaner.clean_dates(stream, schema=FilePath(input_year).path)
+    #stream = cleaner.clean_dates(stream)
+    #stream = cleaner.clean_categories(stream)
+
 
     # Output results
     stream = csww_record.message_collector(stream)
@@ -209,25 +215,25 @@ def pan_agg(input, la_code, output):
 
 # Run in Visual Studio Code |>
 
-cleanfile(
-    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml",
-    "NEW",
-    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
-
-la_agg(
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
-
-la_agg(
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
-
-pan_agg(
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/CSWW_CSWWWorker_merged.csv",
-    "NEW",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
\ No newline at end of file
+# cleanfile(
+#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+#     "BAD",
+#     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
+
+# la_agg(
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
+
+# la_agg(
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
+
+# pan_agg(
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/CSWW_CSWWWorker_merged.csv",
+#     "BAD",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 0ecc10ee..70e41079 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -7,7 +7,7 @@
 from liiatools.datasets.s903.lds_ssda903_clean.converters import to_category, to_integer
 
 from liiatools.datasets.shared_functions.converters import to_date
-from liiatools.datasets.shared_functions.common import check_postcode
+#from liiatools.datasets.shared_functions.common import check_postcode
 
 log = logging.getLogger(__name__)
 
@@ -22,9 +22,32 @@ def clean_dates(event):
     :param event: A filtered list of event objects of type Cell
     :return: An updated list of event objects
     """
+    #print("running clean_dates")
+    #print(f"running clean_dates with date: {event.config_dict['date']}")
     date = event.config_dict["date"]
     try:
         text = to_date(event.cell, date)
         return event.from_event(event, cell=text, error="0")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, cell="", error="1")
+
+
+@streamfilter(
+    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+)
+def clean_categories(event):
+    """
+    Convert all values that should be categories to categories based on the config.yaml file
+
+    :param event: A filtered list of event objects of type Cell
+    :return: An updated list of event objects
+    """
+    category = event.config_dict["category"]
+    try:
+        text = to_category(event.cell, category)
+        if text != "error":
+            return event.from_event(event, cell=text, error="0")
+        else:
+            return event.from_event(event, cell="", error="1")
+    except (AttributeError, TypeError, ValueError):
+        return event.from_event(event, cell="", error="1")
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv b/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv
new file mode 100644
index 00000000..912fc8a2
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv
@@ -0,0 +1,2 @@
+NumberOfVacancies,NoAgencyFTE,NoAgencyHeadcount,YEAR,LA
+79.68,59.82,71,2022,Barking and Dagenham
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv b/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv
new file mode 100644
index 00000000..d00f569c
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv
@@ -0,0 +1,31 @@
+AgencyWorker,SWENo,FTE,PersonBirthDate,GenderCurrent,Ethnicity,QualInst,StepUpGrad,RoleStartDate,StartOrigin,Cases30,WorkingDaysLost,ContractWeeks,FrontlineGrad,Absat30Sept,ReasonAbsence,CFKSSstatus,YEAR,LA
+1,93df82943c4e0757d41565d23ecb26ecd4e146e752cf9a13f80fde8c44cfee47,0.521371,1967-01-01,1,REFU,Institution Name,0,1988-04-07,9,72,15.31,288.7,1,0,TRN,1,2022,Barking and Dagenham
+1,be96a5ee5b7cc0caff4ba0452bb7d9e07d920d586912fba81d8655b373c25e04,0,1958-04-01,9,,Institution Name,1,,8,66,29.87,2.5,1,,,4,2022,Barking and Dagenham
+1,a0848103de9608a0cf1daf5e0919a81b9eaf1c315bc350348e2a8124d09762fa,0,1984-01-01,,APKN,Institution Name,0,2014-01-26,4,,92.56,213.4,0,,,2,2022,Barking and Dagenham
+0,aca59cf1fc90a06d57abaf1e396e74ec645df9755b78a8b9b5fdc2ac1dcc3bf2,0.899676,1990-09-01,9,BAFR,,0,2023-03-28,2,,14.39,,0,,,,2022,Barking and Dagenham
+1,b5b3c4b99d2d644b05e28989a2c3c3dddfbbdb0e479ac5c7bb9d2e77333a4f53,0.133587,,2,AIND,Institution Name,1,2017-06-10,,,5.39,475.7,,1,UNP,1,2022,Barking and Dagenham
+1,8e6ca61bfea2d19acf090550f277dc566ee5908fb7d0c0c45bd11abbbf04c049,0,1993-05-01,9,,,,2020-06-14,3,11,,141.0,1,,,,2022,Barking and Dagenham
+1,1ce5045eb004aa120117e90a0eb2aea4aa38523be9cedb37d2621bcbcbba329c,0.803122,,,WBRI,Institution Name,,,,,95.06,403.6,0,,,2,2022,Barking and Dagenham
+1,56ea36a6ef0a6e8e0bf43643363260aaa133e106d40d76c4ef8b677c0ed65f04,0.767688,1996-08-01,0,MWAS,,1,2023-03-28,,62,,,1,,,2,2022,Barking and Dagenham
+0,b41b26ea7649687be2f38cec5274cacea9092418177a5270a6aa2343b04ae22e,0.843488,1959-04-01,2,APKN,Institution Name,0,1997-10-01,2,,16.74,456.3,0,,,4,2022,Barking and Dagenham
+1,4c9a07fd1a15975784ffaa889a44c906ebc56fec34de4deeecdfafde430add1a,0,1971-10-01,,,Institution Name,1,1993-10-04,,45,22.98,441.5,0,,,3,2022,Barking and Dagenham
+1,8e23e51271599fb562e2db2756034cf7b954ae51a654c020bdea17ff439c68e2,0.12232,1996-06-01,1,BAFR,,,,10,23,36.13,213.1,0,,,,2022,Barking and Dagenham
+0,6ac44220390e4481616f4df2cfe2cb2471e420acc53168214b21bb4dc410a2a0,0,1987-10-01,,,,0,2012-10-02,2,37,90.85,28.5,1,1,UNA,1,2022,Barking and Dagenham
+1,e974f0b0b3a6b797b3acccacaf8489bdf4b92ad5546f7595cf14fb451599ac57,0,1968-11-01,,ABAN,,0,2018-08-03,9,,43.02,154.7,1,,,1,2022,Barking and Dagenham
+0,0b453303370012999ca7de11249dcb2aa0c2c94210cf4c51529426198cad8111,0,,0,AOTH,Institution Name,0,2015-04-24,9,,3.51,424.0,,,,2,2022,Barking and Dagenham
+0,14d5de9740886b7a7dd72877b1d39971f66d1f7a0c364fc68c8176aa582127fa,0.859218,1968-12-01,0,BCRB,Institution Name,1,2003-09-12,5,85,98.22,206.6,,,,2,2022,Barking and Dagenham
+0,fb48d144eda11163fa890ed4663e0463be3902d19837f28dadba08235d371496,0.021911,1977-06-01,,REFU,Institution Name,1,2022-10-08,6,25,29.19,,1,1,SIC,,2022,Barking and Dagenham
+1,25f8e1c67c130e33a250ec3216cfe3630c15ffe7fc12066b94d5c90244878c1a,0.786453,1996-11-01,0,MWAS,Institution Name,0,2023-03-28,1,83,78.29,364.4,1,,,,2022,Barking and Dagenham
+0,f4b772fd07a4b01a9a3f88808ed073fe5b299f2cf4be244883154614dbd8ae5c,0.491425,1998-04-01,1,,,0,2023-03-28,4,,3.1,415.3,,,,4,2022,Barking and Dagenham
+1,c569dae7a53fce2755b29a979664734b18275c28b7f5817f71b65f5776e04e64,0.192894,1959-09-01,0,REFU,Institution Name,1,1985-12-12,9,14,,188.4,0,,,1,2022,Barking and Dagenham
+0,9e56119a6e4f2760d518f5b9537d81f6de943c1144cce67e1f14f6ad44e81dd2,0,1962-11-01,,WBRI,,1,1999-07-14,,65,16.26,,1,,,,2022,Barking and Dagenham
+1,5a075ae2486f88a2b944784d643f2ad5f06e2263bbe65c6c4db8a07b20f7d1c7,0.862474,1992-02-01,0,WBRI,,1,2023-03-28,4,39,,,0,,,2,2022,Barking and Dagenham
+1,256eb6f0eda1d17c6cf4c9cd5b7ff965e302218b2dc6ffe681269e52faab0e29,0,2001-10-01,1,AIND,,0,,1,29,38.71,339.9,0,,,,2022,Barking and Dagenham
+1,79f9865feea17900c27cf27e98ba15de977887a4844241ed019874259ad7ef6a,0.668266,1983-04-01,9,,Institution Name,1,2023-03-28,3,,,,1,,,2,2022,Barking and Dagenham
+0,34ec568120a609fa591dcf9c7b5331640f01ef262f379a8816a2876bceb0b191,0.978729,1958-04-01,9,MWBA,Institution Name,,2002-01-31,1,,,121.9,1,,,3,2022,Barking and Dagenham
+1,d258de55dec2e6165c8fb23ceeaf4777bfde025c4c9f0e4d5a1eca26d481a1e2,0,1989-06-01,1,,,1,2011-08-31,5,70,12.2,301.3,0,,,4,2022,Barking and Dagenham
+0,d441374724414d204880c95117ef87c169a6aba7eaddd1ed4a613244a87b75e4,0,1981-09-01,,CHNE,,1,2001-02-10,6,,94.67,471.5,0,,,2,2022,Barking and Dagenham
+1,206e97c471f4d5221f8db5aa404d02939e28e13591da37cba903e9f2e1ecc3cd,0.561974,,,OOTH,Institution Name,0,2014-09-30,4,63,87.59,,,0,SIC,1,2022,Barking and Dagenham
+0,c5c93de4d0858abb1382d1af403cf9d2e1e87e650fce313b36e0e84bbac0f8b4,0,,,,Institution Name,0,1993-04-18,3,80,,299.1,0,,,4,2022,Barking and Dagenham
+0,6e4c8b306eae10888de2dbc28c334f74201a1e7f41da4cac9f71e65dff211ef8,0.034436,,2,APKN,Institution Name,,1997-01-11,4,23,83.01,,0,,,3,2022,Barking and Dagenham
+1,170646f335a0f93ad112764dcd11fe7a4890f08aefb28d1696807e37e882153d,0,1975-01-01,9,WOTH,Institution Name,0,2016-08-20,9,87,13.01,,1,,,,2022,Barking and Dagenham
diff --git a/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml b/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
new file mode 100644
index 00000000..3cb567fa
--- /dev/null
+++ b/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
@@ -0,0 +1,556 @@
+<Message>
+  <Header>
+    <CollectionDetails>
+      <Collection>CSWW</Collection>
+      <Year>2022</Year>
+      <ReferenceDate>2022-09-30</ReferenceDate>
+    </CollectionDetails>
+    <Source>
+      <SourceLevel>L</SourceLevel>
+      <LEA>314</LEA>
+      <SoftwareCode>liiatools.datasets.social_work_workforce.sample_data</SoftwareCode>
+      <DateTime>2023-03-28T14:54:55Z</DateTime>
+    </Source>
+  </Header>
+  <LALevelVacancies>
+    <NumberOfVacancies>99.68</NumberOfVacancies>
+    <NoAgencyFTE>75.82</NoAgencyFTE>
+    <NoAgencyHeadcount>142</NoAgencyHeadcount>
+  </LALevelVacancies>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Oy2054309383</SWENo>
+    <FTE>0.521371</FTE>
+    <PersonBirthDate>1977-08-10</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <Ethnicity>REFU</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>0</StepUpGrad>
+    <RoleStartDate>1988-04-07</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <Cases30>72</Cases30>
+    <WorkingDaysLost>15.31</WorkingDaysLost>
+    <ContractWeeks>288.7</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <Absat30Sept>0</Absat30Sept>
+    <ReasonAbsence>TRN</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Yk7226043359</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1958-04-07</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>2</OrgRole>
+    <StartOrigin>8</StartOrigin>
+    <RoleEndDate>2019-09-23</RoleEndDate>
+    <LeaverDestination>7</LeaverDestination>
+    <ReasonLeave>10</ReasonLeave>
+    <FTE30>0.603665</FTE30>
+    <Cases30>66</Cases30>
+    <WorkingDaysLost>29.87</WorkingDaysLost>
+    <ContractWeeks>2.5</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>iP8098309864</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1984-01-12</PersonBirthDate>
+    <Ethnicity>APKN</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>5</OrgRole>
+    <RoleStartDate>2014-01-26</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>3</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <FTE30>0.23246</FTE30>
+    <WorkingDaysLost>92.56</WorkingDaysLost>
+    <ContractWeeks>213.4</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>oP8178849586</SWENo>
+    <FTE>0.899676</FTE>
+    <PersonBirthDate>1990-09-28</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <Ethnicity>BAFR</Ethnicity>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>2</StartOrigin>
+    <FTE30>0.429963</FTE30>
+    <WorkingDaysLost>14.39</WorkingDaysLost>
+    <FrontlineGrad>0</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>nH9419631053</SWENo>
+    <FTE>0.133587</FTE>
+    <GenderCurrent>2</GenderCurrent>
+    <Ethnicity>AIND</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2017-06-10</RoleStartDate>
+    <FTE30>0.436348</FTE30>
+    <WorkingDaysLost>5.39</WorkingDaysLost>
+    <ContractWeeks>475.7</ContractWeeks>
+    <Absat30Sept>1</Absat30Sept>
+    <ReasonAbsence>UNP</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>JJ3661684122</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1993-05-19</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <QualLevel>3</QualLevel>
+    <RoleStartDate>2020-06-14</RoleStartDate>
+    <StartOrigin>3</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>1</LeaverDestination>
+    <ReasonLeave>5</ReasonLeave>
+    <FTE30>0.903669</FTE30>
+    <Cases30>11</Cases30>
+    <ContractWeeks>141.0</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>tN2120744892</SWENo>
+    <FTE>0.803122</FTE>
+    <Ethnicity>WBRI</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <FTE30>0.964327</FTE30>
+    <WorkingDaysLost>95.06</WorkingDaysLost>
+    <ContractWeeks>403.6</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Zo9779760045</SWENo>
+    <FTE>0.767688</FTE>
+    <PersonBirthDate>1996-08-31</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>MWAS</Ethnicity>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>5</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <Cases30>62</Cases30>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>wf3752370095</SWENo>
+    <FTE>0.843488</FTE>
+    <PersonBirthDate>1959-04-17</PersonBirthDate>
+    <GenderCurrent>2</GenderCurrent>
+    <Ethnicity>APKN</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <RoleStartDate>1997-10-01</RoleStartDate>
+    <StartOrigin>2</StartOrigin>
+    <FTE30>0.712824</FTE30>
+    <WorkingDaysLost>16.74</WorkingDaysLost>
+    <ContractWeeks>456.3</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>OW2475789301</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1971-10-02</PersonBirthDate>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>1993-10-04</RoleStartDate>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>3</LeaverDestination>
+    <ReasonLeave>6</ReasonLeave>
+    <FTE30>0.908092</FTE30>
+    <Cases30>45</Cases30>
+    <WorkingDaysLost>22.98</WorkingDaysLost>
+    <ContractWeeks>441.5</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>3</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Kv3016593719</SWENo>
+    <FTE>0.12232</FTE>
+    <PersonBirthDate>1996-06-05</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <Ethnicity>BAFR</Ethnicity>
+    <QualLevel>2</QualLevel>
+    <OrgRole>6</OrgRole>
+    <StartOrigin>10</StartOrigin>
+    <FTE30>0.641824</FTE30>
+    <Cases30>23</Cases30>
+    <WorkingDaysLost>36.13</WorkingDaysLost>
+    <ContractWeeks>213.1</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>TB9669555723</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1987-10-30</PersonBirthDate>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>2</OrgRole>
+    <RoleStartDate>2012-10-02</RoleStartDate>
+    <StartOrigin>2</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>6</LeaverDestination>
+    <ReasonLeave>7</ReasonLeave>
+    <Cases30>37</Cases30>
+    <WorkingDaysLost>90.85</WorkingDaysLost>
+    <ContractWeeks>28.5</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <Absat30Sept>1</Absat30Sept>
+    <ReasonAbsence>UNA</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>QK8499162867</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1968-11-27</PersonBirthDate>
+    <Ethnicity>ABAN</Ethnicity>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2018-08-03</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>2</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <FTE30>0.078464</FTE30>
+    <WorkingDaysLost>43.02</WorkingDaysLost>
+    <ContractWeeks>154.7</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Wr5514040878</SWENo>
+    <FTE>0</FTE>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>AOTH</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>2</OrgRole>
+    <RoleStartDate>2015-04-24</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>1</LeaverDestination>
+    <ReasonLeave>6</ReasonLeave>
+    <WorkingDaysLost>3.51</WorkingDaysLost>
+    <ContractWeeks>424.0</ContractWeeks>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Aj9242652291</SWENo>
+    <FTE>0.859218</FTE>
+    <PersonBirthDate>1968-12-31</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>BCRB</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>2003-09-12</RoleStartDate>
+    <StartOrigin>5</StartOrigin>
+    <FTE30>0.320526</FTE30>
+    <Cases30>85</Cases30>
+    <WorkingDaysLost>98.22</WorkingDaysLost>
+    <ContractWeeks>206.6</ContractWeeks>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Jv2635496195</SWENo>
+    <FTE>0.021911</FTE>
+    <PersonBirthDate>1977-06-27</PersonBirthDate>
+    <Ethnicity>REFU</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2022-10-08</RoleStartDate>
+    <StartOrigin>6</StartOrigin>
+    <FTE30>0.69819</FTE30>
+    <Cases30>25</Cases30>
+    <WorkingDaysLost>29.19</WorkingDaysLost>
+    <FrontlineGrad>1</FrontlineGrad>
+    <Absat30Sept>1</Absat30Sept>
+    <ReasonAbsence>SIC</ReasonAbsence>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>To5555885076</SWENo>
+    <FTE>0.786453</FTE>
+    <PersonBirthDate>1996-11-18</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>MWAS</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>4</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>1</StartOrigin>
+    <FTE30>0.441344</FTE30>
+    <Cases30>83</Cases30>
+    <WorkingDaysLost>78.29</WorkingDaysLost>
+    <ContractWeeks>364.4</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>rK9218104079</SWENo>
+    <FTE>0.491425</FTE>
+    <PersonBirthDate>1998-04-15</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.939826</FTE30>
+    <WorkingDaysLost>3.1</WorkingDaysLost>
+    <ContractWeeks>415.3</ContractWeeks>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>cD9282390165</SWENo>
+    <FTE>0.192894</FTE>
+    <PersonBirthDate>1959-09-25</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>REFU</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>3</OrgRole>
+    <RoleStartDate>1985-12-12</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <FTE30>0.18449</FTE30>
+    <Cases30>14</Cases30>
+    <ContractWeeks>188.4</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>zU6140515687</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1962-11-04</PersonBirthDate>
+    <Ethnicity>WBRI</Ethnicity>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>3</OrgRole>
+    <RoleStartDate>1999-07-14</RoleStartDate>
+    <RoleEndDate>2018-08-20</RoleEndDate>
+    <LeaverDestination>8</LeaverDestination>
+    <ReasonLeave>3</ReasonLeave>
+    <FTE30>0.222573</FTE30>
+    <Cases30>65</Cases30>
+    <WorkingDaysLost>16.26</WorkingDaysLost>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>ih3342923522</SWENo>
+    <FTE>0.862474</FTE>
+    <PersonBirthDate>1992-02-18</PersonBirthDate>
+    <GenderCurrent>0</GenderCurrent>
+    <Ethnicity>WBRI</Ethnicity>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.761443</FTE30>
+    <Cases30>39</Cases30>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>cm3809724991</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>2001-10-29</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <Ethnicity>AIND</Ethnicity>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <StartOrigin>1</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>5</LeaverDestination>
+    <ReasonLeave>4</ReasonLeave>
+    <FTE30>0.530908</FTE30>
+    <Cases30>29</Cases30>
+    <WorkingDaysLost>38.71</WorkingDaysLost>
+    <ContractWeeks>339.9</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>PA8564166424</SWENo>
+    <FTE>0.668266</FTE>
+    <PersonBirthDate>1983-04-13</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <QualInst>Institution Name</QualInst>
+    <StepUpGrad>1</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>2023-03-28</RoleStartDate>
+    <StartOrigin>3</StartOrigin>
+    <FTE30>0.707445</FTE30>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>QW8564363911</SWENo>
+    <FTE>0.978729</FTE>
+    <PersonBirthDate>1958-04-26</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <Ethnicity>MWBA</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <RoleStartDate>2002-01-31</RoleStartDate>
+    <StartOrigin>1</StartOrigin>
+    <FTE30>0.698641</FTE30>
+    <ContractWeeks>121.9</ContractWeeks>
+    <FrontlineGrad>1</FrontlineGrad>
+    <CFKSSstatus>3</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>PQ5842914246</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1989-06-05</PersonBirthDate>
+    <GenderCurrent>1</GenderCurrent>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2011-08-31</RoleStartDate>
+    <StartOrigin>5</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>7</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <FTE30>0.443976</FTE30>
+    <Cases30>70</Cases30>
+    <WorkingDaysLost>12.2</WorkingDaysLost>
+    <ContractWeeks>301.3</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>ZQ9393137749</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1981-09-21</PersonBirthDate>
+    <Ethnicity>CHNE</Ethnicity>
+    <QualLevel>1</QualLevel>
+    <StepUpGrad>1</StepUpGrad>
+    <RoleStartDate>2001-02-10</RoleStartDate>
+    <StartOrigin>6</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>4</LeaverDestination>
+    <ReasonLeave>1</ReasonLeave>
+    <FTE30>0.821627</FTE30>
+    <WorkingDaysLost>94.67</WorkingDaysLost>
+    <ContractWeeks>471.5</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>2</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>Pv9093835426</SWENo>
+    <FTE>0.561974</FTE>
+    <Ethnicity>OOTH</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>6</OrgRole>
+    <RoleStartDate>2014-09-30</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.965936</FTE30>
+    <Cases30>63</Cases30>
+    <WorkingDaysLost>87.59</WorkingDaysLost>
+    <Absat30Sept>0</Absat30Sept>
+    <ReasonAbsence>SIC</ReasonAbsence>
+    <CFKSSstatus>1</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>eW7601111729</SWENo>
+    <FTE>0</FTE>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>3</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>1993-04-18</RoleStartDate>
+    <StartOrigin>3</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>7</LeaverDestination>
+    <ReasonLeave>1</ReasonLeave>
+    <FTE30>0.63075</FTE30>
+    <Cases30>80</Cases30>
+    <ContractWeeks>299.1</ContractWeeks>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>4</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>0</AgencyWorker>
+    <SWENo>Jd1465867330</SWENo>
+    <FTE>0.034436</FTE>
+    <GenderCurrent>2</GenderCurrent>
+    <Ethnicity>APKN</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>1</QualLevel>
+    <RoleStartDate>1997-01-11</RoleStartDate>
+    <StartOrigin>4</StartOrigin>
+    <FTE30>0.22182</FTE30>
+    <Cases30>23</Cases30>
+    <WorkingDaysLost>83.01</WorkingDaysLost>
+    <FrontlineGrad>0</FrontlineGrad>
+    <CFKSSstatus>3</CFKSSstatus>
+  </CSWWWorker>
+  <CSWWWorker>
+    <AgencyWorker>1</AgencyWorker>
+    <SWENo>od1620971821</SWENo>
+    <FTE>0</FTE>
+    <PersonBirthDate>1975-01-19</PersonBirthDate>
+    <GenderCurrent>9</GenderCurrent>
+    <Ethnicity>WOTH</Ethnicity>
+    <QualInst>Institution Name</QualInst>
+    <QualLevel>2</QualLevel>
+    <StepUpGrad>0</StepUpGrad>
+    <OrgRole>1</OrgRole>
+    <RoleStartDate>2016-08-20</RoleStartDate>
+    <StartOrigin>9</StartOrigin>
+    <RoleEndDate>2023-03-28</RoleEndDate>
+    <LeaverDestination>5</LeaverDestination>
+    <ReasonLeave>9</ReasonLeave>
+    <Cases30>87</Cases30>
+    <WorkingDaysLost>13.01</WorkingDaysLost>
+    <FrontlineGrad>1</FrontlineGrad>
+  </CSWWWorker>
+</Message>
\ No newline at end of file

From f77f196207e8331c2d0202c0c5c1e0e15a292214 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Thu, 6 Jul 2023 11:37:10 +0000
Subject: [PATCH 19/40] Remove csv files

---
 ...cial_work_workforce_2022_lalevel_clean.csv |  2 --
 ...ocial_work_workforce_2022_worker_clean.csv | 31 -------------------
 2 files changed, 33 deletions(-)
 delete mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv
 delete mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv b/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv
deleted file mode 100644
index 912fc8a2..00000000
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-NumberOfVacancies,NoAgencyFTE,NoAgencyHeadcount,YEAR,LA
-79.68,59.82,71,2022,Barking and Dagenham
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv b/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv
deleted file mode 100644
index d00f569c..00000000
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv
+++ /dev/null
@@ -1,31 +0,0 @@
-AgencyWorker,SWENo,FTE,PersonBirthDate,GenderCurrent,Ethnicity,QualInst,StepUpGrad,RoleStartDate,StartOrigin,Cases30,WorkingDaysLost,ContractWeeks,FrontlineGrad,Absat30Sept,ReasonAbsence,CFKSSstatus,YEAR,LA
-1,93df82943c4e0757d41565d23ecb26ecd4e146e752cf9a13f80fde8c44cfee47,0.521371,1967-01-01,1,REFU,Institution Name,0,1988-04-07,9,72,15.31,288.7,1,0,TRN,1,2022,Barking and Dagenham
-1,be96a5ee5b7cc0caff4ba0452bb7d9e07d920d586912fba81d8655b373c25e04,0,1958-04-01,9,,Institution Name,1,,8,66,29.87,2.5,1,,,4,2022,Barking and Dagenham
-1,a0848103de9608a0cf1daf5e0919a81b9eaf1c315bc350348e2a8124d09762fa,0,1984-01-01,,APKN,Institution Name,0,2014-01-26,4,,92.56,213.4,0,,,2,2022,Barking and Dagenham
-0,aca59cf1fc90a06d57abaf1e396e74ec645df9755b78a8b9b5fdc2ac1dcc3bf2,0.899676,1990-09-01,9,BAFR,,0,2023-03-28,2,,14.39,,0,,,,2022,Barking and Dagenham
-1,b5b3c4b99d2d644b05e28989a2c3c3dddfbbdb0e479ac5c7bb9d2e77333a4f53,0.133587,,2,AIND,Institution Name,1,2017-06-10,,,5.39,475.7,,1,UNP,1,2022,Barking and Dagenham
-1,8e6ca61bfea2d19acf090550f277dc566ee5908fb7d0c0c45bd11abbbf04c049,0,1993-05-01,9,,,,2020-06-14,3,11,,141.0,1,,,,2022,Barking and Dagenham
-1,1ce5045eb004aa120117e90a0eb2aea4aa38523be9cedb37d2621bcbcbba329c,0.803122,,,WBRI,Institution Name,,,,,95.06,403.6,0,,,2,2022,Barking and Dagenham
-1,56ea36a6ef0a6e8e0bf43643363260aaa133e106d40d76c4ef8b677c0ed65f04,0.767688,1996-08-01,0,MWAS,,1,2023-03-28,,62,,,1,,,2,2022,Barking and Dagenham
-0,b41b26ea7649687be2f38cec5274cacea9092418177a5270a6aa2343b04ae22e,0.843488,1959-04-01,2,APKN,Institution Name,0,1997-10-01,2,,16.74,456.3,0,,,4,2022,Barking and Dagenham
-1,4c9a07fd1a15975784ffaa889a44c906ebc56fec34de4deeecdfafde430add1a,0,1971-10-01,,,Institution Name,1,1993-10-04,,45,22.98,441.5,0,,,3,2022,Barking and Dagenham
-1,8e23e51271599fb562e2db2756034cf7b954ae51a654c020bdea17ff439c68e2,0.12232,1996-06-01,1,BAFR,,,,10,23,36.13,213.1,0,,,,2022,Barking and Dagenham
-0,6ac44220390e4481616f4df2cfe2cb2471e420acc53168214b21bb4dc410a2a0,0,1987-10-01,,,,0,2012-10-02,2,37,90.85,28.5,1,1,UNA,1,2022,Barking and Dagenham
-1,e974f0b0b3a6b797b3acccacaf8489bdf4b92ad5546f7595cf14fb451599ac57,0,1968-11-01,,ABAN,,0,2018-08-03,9,,43.02,154.7,1,,,1,2022,Barking and Dagenham
-0,0b453303370012999ca7de11249dcb2aa0c2c94210cf4c51529426198cad8111,0,,0,AOTH,Institution Name,0,2015-04-24,9,,3.51,424.0,,,,2,2022,Barking and Dagenham
-0,14d5de9740886b7a7dd72877b1d39971f66d1f7a0c364fc68c8176aa582127fa,0.859218,1968-12-01,0,BCRB,Institution Name,1,2003-09-12,5,85,98.22,206.6,,,,2,2022,Barking and Dagenham
-0,fb48d144eda11163fa890ed4663e0463be3902d19837f28dadba08235d371496,0.021911,1977-06-01,,REFU,Institution Name,1,2022-10-08,6,25,29.19,,1,1,SIC,,2022,Barking and Dagenham
-1,25f8e1c67c130e33a250ec3216cfe3630c15ffe7fc12066b94d5c90244878c1a,0.786453,1996-11-01,0,MWAS,Institution Name,0,2023-03-28,1,83,78.29,364.4,1,,,,2022,Barking and Dagenham
-0,f4b772fd07a4b01a9a3f88808ed073fe5b299f2cf4be244883154614dbd8ae5c,0.491425,1998-04-01,1,,,0,2023-03-28,4,,3.1,415.3,,,,4,2022,Barking and Dagenham
-1,c569dae7a53fce2755b29a979664734b18275c28b7f5817f71b65f5776e04e64,0.192894,1959-09-01,0,REFU,Institution Name,1,1985-12-12,9,14,,188.4,0,,,1,2022,Barking and Dagenham
-0,9e56119a6e4f2760d518f5b9537d81f6de943c1144cce67e1f14f6ad44e81dd2,0,1962-11-01,,WBRI,,1,1999-07-14,,65,16.26,,1,,,,2022,Barking and Dagenham
-1,5a075ae2486f88a2b944784d643f2ad5f06e2263bbe65c6c4db8a07b20f7d1c7,0.862474,1992-02-01,0,WBRI,,1,2023-03-28,4,39,,,0,,,2,2022,Barking and Dagenham
-1,256eb6f0eda1d17c6cf4c9cd5b7ff965e302218b2dc6ffe681269e52faab0e29,0,2001-10-01,1,AIND,,0,,1,29,38.71,339.9,0,,,,2022,Barking and Dagenham
-1,79f9865feea17900c27cf27e98ba15de977887a4844241ed019874259ad7ef6a,0.668266,1983-04-01,9,,Institution Name,1,2023-03-28,3,,,,1,,,2,2022,Barking and Dagenham
-0,34ec568120a609fa591dcf9c7b5331640f01ef262f379a8816a2876bceb0b191,0.978729,1958-04-01,9,MWBA,Institution Name,,2002-01-31,1,,,121.9,1,,,3,2022,Barking and Dagenham
-1,d258de55dec2e6165c8fb23ceeaf4777bfde025c4c9f0e4d5a1eca26d481a1e2,0,1989-06-01,1,,,1,2011-08-31,5,70,12.2,301.3,0,,,4,2022,Barking and Dagenham
-0,d441374724414d204880c95117ef87c169a6aba7eaddd1ed4a613244a87b75e4,0,1981-09-01,,CHNE,,1,2001-02-10,6,,94.67,471.5,0,,,2,2022,Barking and Dagenham
-1,206e97c471f4d5221f8db5aa404d02939e28e13591da37cba903e9f2e1ecc3cd,0.561974,,,OOTH,Institution Name,0,2014-09-30,4,63,87.59,,,0,SIC,1,2022,Barking and Dagenham
-0,c5c93de4d0858abb1382d1af403cf9d2e1e87e650fce313b36e0e84bbac0f8b4,0,,,,Institution Name,0,1993-04-18,3,80,,299.1,0,,,4,2022,Barking and Dagenham
-0,6e4c8b306eae10888de2dbc28c334f74201a1e7f41da4cac9f71e65dff211ef8,0.034436,,2,APKN,Institution Name,,1997-01-11,4,23,83.01,,0,,,3,2022,Barking and Dagenham
-1,170646f335a0f93ad112764dcd11fe7a4890f08aefb28d1696807e37e882153d,0,1975-01-01,9,WOTH,Institution Name,0,2016-08-20,9,87,13.01,,1,,,,2022,Barking and Dagenham

From 034f7a2d1cdbd4708e9d9b192e75f307f5b5d0fb Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Fri, 7 Jul 2023 07:50:52 +0000
Subject: [PATCH 20/40] Implement workforce cli; start adding cleansing

---
 .../csww_main_functions.py                    | 16 +++++-----
 .../lds_csww_clean/cleaner.py                 | 31 +++++++++----------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 04833111..9dc8b5fb 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -131,8 +131,8 @@ def cleanfile(input, la_code, la_log_dir, output):
     )
 
     # Clean stream
-    #stream = cleaner.clean_dates(stream)
-    #stream = cleaner.clean_categories(stream)
+    stream = cleaner.clean_categories(stream)
+    stream = cleaner.clean_dates(stream)
 
 
     # Output results
@@ -215,12 +215,12 @@ def pan_agg(input, la_code, output):
 
 # Run in Visual Studio Code |>
 
-# cleanfile(
-#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-#     "BAD",
-#     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
+cleanfile(
+    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+    "BAD",
+    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
 
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 70e41079..d0596cd7 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -17,19 +17,17 @@
 )
 def clean_dates(event):
     """
-    Convert all values that should be dates to dates based on the config.yaml file
+    Convert all values that should be dates to dates based on the schema xsd file
 
-    :param event: A filtered list of event objects of type Cell
+    :param event: A filtered list of event objects of type text
     :return: An updated list of event objects
     """
-    #print("running clean_dates")
-    #print(f"running clean_dates with date: {event.config_dict['date']}")
-    date = event.config_dict["date"]
+    date = event.schema_dict["date"]
     try:
-        text = to_date(event.cell, date)
-        return event.from_event(event, cell=text, error="0")
+        newtext = to_date(event.text, date)
+        return event.from_event(event, text=f"xDATEx{newtext}", error="0")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, cell="", error="1")
+        return event.from_event(event, text="", error="1")
 
 
 @streamfilter(
@@ -37,17 +35,16 @@ def clean_dates(event):
 )
 def clean_categories(event):
     """
-    Convert all values that should be categories to categories based on the config.yaml file
+    Convert all values that should be categories to categories based on the schema xsd file
 
-    :param event: A filtered list of event objects of type Cell
+    :param event: A filtered list of event objects of type text
     :return: An updated list of event objects
     """
-    category = event.config_dict["category"]
+    category = event.schema_dict["category"]
     try:
-        text = to_category(event.cell, category)
-        if text != "error":
-            return event.from_event(event, cell=text, error="0")
-        else:
-            return event.from_event(event, cell="", error="1")
+        newtext = to_category(event.text, category)
+        if newtext != "error":
+            return event.from_event(event, text=f"xCATx{newtext}", error='0')
+        return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, cell="", error="1")
\ No newline at end of file
+        return event.from_event(event, text="", error="1")
\ No newline at end of file

From 5509c60b7bba40990180b9082f05b6aa87d37779 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Fri, 7 Jul 2023 11:09:42 +0000
Subject: [PATCH 21/40] Add missing fields to la-agg.yml

---
 .../social_work_workforce/csww_main_functions.py     | 12 ++++++------
 .../social_work_workforce/lds_csww_clean/cleaner.py  |  4 ++--
 liiatools/spec/social_work_workforce/la-agg.yml      |  3 +++
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 9dc8b5fb..86ac784c 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -215,12 +215,12 @@ def pan_agg(input, la_code, output):
 
 # Run in Visual Studio Code |>
 
-cleanfile(
-    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-    "BAD",
-    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
+# cleanfile(
+#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+#     "BAD",
+#     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
 
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index d0596cd7..b85b5da1 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -25,7 +25,7 @@ def clean_dates(event):
     date = event.schema_dict["date"]
     try:
         newtext = to_date(event.text, date)
-        return event.from_event(event, text=f"xDATEx{newtext}", error="0")
+        return event.from_event(event, text=newtext, error="0")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
 
@@ -44,7 +44,7 @@ def clean_categories(event):
     try:
         newtext = to_category(event.text, category)
         if newtext != "error":
-            return event.from_event(event, text=f"xCATx{newtext}", error='0')
+            return event.from_event(event, text=newtext, error='0')
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
\ No newline at end of file
diff --git a/liiatools/spec/social_work_workforce/la-agg.yml b/liiatools/spec/social_work_workforce/la-agg.yml
index d3b909ff..3d64fe54 100644
--- a/liiatools/spec/social_work_workforce/la-agg.yml
+++ b/liiatools/spec/social_work_workforce/la-agg.yml
@@ -39,6 +39,9 @@ sort_order:
 
 dedup:
     CSWWWorker:
+        - AgencyWorker
+        - SWENo
+        - FTE
         - PersonBirthDate
         - GenderCurrent
         - Ethnicity

From 0a54643d3c2d595062f2ac8d7deff577bc42154c Mon Sep 17 00:00:00 2001
From: patrick-troy <58770937+patrick-troy@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:43:46 +0100
Subject: [PATCH 22/40] add remaining schema_dicts

---
 .../csdatatools/datasets/cincensus/filters.py | 90 +++++++++++++++++--
 .../csww_main_functions.py                    | 14 ++-
 2 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/liiatools/csdatatools/datasets/cincensus/filters.py b/liiatools/csdatatools/datasets/cincensus/filters.py
index c6d7f313..87bedf33 100644
--- a/liiatools/csdatatools/datasets/cincensus/filters.py
+++ b/liiatools/csdatatools/datasets/cincensus/filters.py
@@ -89,8 +89,64 @@ def _create_category_dict(field: str, file: str):
         return
 
 
+def _create_float_dict(field: str, file: str):
+    float_dict = None
+
+    xsd_xml = ET.parse(file)
+    search_elem = f".//{{http://www.w3.org/2001/XMLSchema}}simpleType[@name='{field}']"
+    element = xsd_xml.find(search_elem)
+
+    search_restriction = f".//{{http://www.w3.org/2001/XMLSchema}}restriction"
+    restriction = element.findall(search_restriction)
+    for r in restriction:
+        code_dict = {"numeric": r.get("base")[3:]}  # Remove the "xs:" from the start of the base string
+        if code_dict["numeric"] == "decimal":
+            float_dict = code_dict
+
+    search_fraction_digits = f".//{{http://www.w3.org/2001/XMLSchema}}fractionDigits"
+    fraction_digits = element.findall(search_fraction_digits)
+    for f in fraction_digits:
+        fraction_digits_dict = {"fixed": f.get("fixed"), "decimal": f.get("value")}
+        float_dict = {**float_dict, **fraction_digits_dict}
+
+    search_min_inclusive = f".//{{http://www.w3.org/2001/XMLSchema}}minInclusive"
+    min_inclusive = element.findall(search_min_inclusive)
+    for m in min_inclusive:
+        min_dict = {"min_inclusive": m.get("value")}
+        float_dict = {**float_dict, **min_dict}
+
+    search_max_inclusive = f".//{{http://www.w3.org/2001/XMLSchema}}maxInclusive"
+    max_inclusive = element.findall(search_max_inclusive)
+    for m in max_inclusive:
+        max_dict = {"max_inclusive": m.get("value")}
+        float_dict = {**float_dict, **max_dict}
+
+    return float_dict
+
+
+def _create_regex_dict(field: str, file: str):
+    regex_dict = None
+
+    xsd_xml = ET.parse(file)
+    search_elem = f".//{{http://www.w3.org/2001/XMLSchema}}simpleType[@name='{field}']"
+    element = xsd_xml.find(search_elem)
+
+    search_restriction = f".//{{http://www.w3.org/2001/XMLSchema}}restriction"
+    restriction = element.findall(search_restriction)
+    for r in restriction:
+        if r.get("base") == "xs:string":
+            regex_dict = {"regex_string": None}
+
+        search_pattern = f".//{{http://www.w3.org/2001/XMLSchema}}pattern"
+        pattern = element.findall(search_pattern)
+        for p in pattern:
+            regex_dict["regex_string"] = p.get("value")
+
+    return regex_dict
+
+
 @streamfilter()
-def add_schema(event, schema: xmlschema.XMLSchema, schema_path: str):
+def add_schema(event, schema: xmlschema.XMLSchema):
     """
     Requires each event to have event.context as set by :func:`add_context`
 
@@ -100,7 +156,6 @@ def add_schema(event, schema: xmlschema.XMLSchema, schema_path: str):
 
     Provides: path, schema
     """
-    schema_dict = None
     assert (
         event.context
     ), "This filter required event.context to be set - see add_context"
@@ -108,10 +163,35 @@ def add_schema(event, schema: xmlschema.XMLSchema, schema_path: str):
     tag = event.context[-1]
     el = schema.get_element(tag, path)
 
-    if el.type.name is not None and el.type.name[-4:] == "type":
-        schema_dict = _create_category_dict(el.type.name, schema_path)
+    return event.from_event(event, path=path, schema=el)
+
+
+@streamfilter(check=type_check(events.TextNode), fail_function=pass_event)
+def add_schema_dict(event, schema_path: str):
+    schema_dict = None
 
-    return event.from_event(event, path=path, schema=el, schema_dict=schema_dict)
+    config_type = event.schema.type.name
+    if config_type is not None:
+        if config_type[-4:] == "type":
+            schema_dict = _create_category_dict(config_type, schema_path)
+        if config_type in ["onedecimalplace", "twodecimalplaces", "ftetype"]:
+            schema_dict = _create_float_dict(config_type, schema_path)
+        if config_type in ["swetype"]:
+            schema_dict = _create_regex_dict(config_type, schema_path)
+        if config_type == "{http://www.w3.org/2001/XMLSchema}date":
+            schema_dict = {"date": "%d/%m/%Y"}
+        if config_type == "{http://www.w3.org/2001/XMLSchema}integer":
+            schema_dict = {"numeric": "integer"}
+        if config_type == "{http://www.w3.org/2001/XMLSchema}string":
+            schema_dict = {"string": "alphanumeric"}
+
+        if schema_dict is not None:
+            if event.schema.occurs[0] == 0:
+                schema_dict = {**schema_dict, **{"canbeblank": "yes"}}
+            elif event.schema.occurs[0] == 1:
+                schema_dict = {**schema_dict, **{"canbeblank": "no"}}
+
+    return event.from_event(event, schema_dict=schema_dict)
 
 
 def _get_validation_error(schema, node) -> XMLSchemaValidatorError:
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 86ac784c..4f2e3e93 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -9,6 +9,7 @@
 from liiatools.csdatatools.util.xml import etree, to_xml, dom_parse
 
 # Dependencies for cleanfile()
+from liiatools.csdatatools.util.xml import dom_parse
 from liiatools.csdatatools.datasets.cincensus import filters
 from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import (
     Schema,
@@ -126,15 +127,13 @@ def cleanfile(input, la_code, la_log_dir, output):
     la_name = flip_dict(config["data_codes"])[la_code]
     stream = filters.strip_text(stream)
     stream = filters.add_context(stream)
-    stream = filters.add_schema(
-        stream, schema=Schema(input_year).schema, schema_path=FilePath(input_year).path
-    )
+    stream = filters.add_schema(stream, schema=Schema(input_year).schema)
+    stream = filters.add_schema_dict(stream, schema_path=FilePath(input_year).path)
 
     # Clean stream
     stream = cleaner.clean_categories(stream)
     stream = cleaner.clean_dates(stream)
 
-
     # Output results
     stream = csww_record.message_collector(stream)
 
@@ -227,6 +226,13 @@ def pan_agg(input, la_code, output):
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 # )
 
+cleanfile(
+    r"C:\Users\patrick.troy\Downloads\LIIA tests\social_work_workforce_2022.xml",
+    "NEW",
+    r"C:\Users\patrick.troy\Downloads\LIIA tests",
+    r"C:\Users\patrick.troy\Downloads\LIIA tests",
+)
+
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",

From 0ad5deb58629897faf5d4af2e018839cecdbf55d Mon Sep 17 00:00:00 2001
From: patrick-troy <58770937+patrick-troy@users.noreply.github.com>
Date: Fri, 7 Jul 2023 15:50:59 +0100
Subject: [PATCH 23/40] add logger functionality

---
 .../csww_main_functions.py                    |  19 +-
 .../lds_csww_clean/filters.py                 | 262 ++++++++++++++++++
 .../lds_csww_clean/logger.py                  | 225 +++++++++++++++
 .../lds_csww_clean/xml.py                     |  49 ++++
 4 files changed, 544 insertions(+), 11 deletions(-)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 4f2e3e93..5f4a0e01 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -6,11 +6,13 @@
     generate_sample_csww_file,
 )
 from liiatools.csdatatools.util.stream import consume
-from liiatools.csdatatools.util.xml import etree, to_xml, dom_parse
 
 # Dependencies for cleanfile()
-from liiatools.csdatatools.util.xml import dom_parse
-from liiatools.csdatatools.datasets.cincensus import filters
+from liiatools.datasets.social_work_workforce.lds_csww_clean.xml import (
+    etree,
+    to_xml,
+    dom_parse
+)
 from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import (
     Schema,
     FilePath,
@@ -21,6 +23,8 @@
     configuration as clean_config,
     csww_record,
     cleaner,
+    logger,
+    filters,
 )
 
 from liiatools.spec import common as common_asset_dir
@@ -102,7 +106,6 @@ def cleanfile(input, la_code, la_log_dir, output):
     ):
         return
     stream = dom_parse(input)
-    stream = list(stream)
 
     # Get year from input file
     try:
@@ -133,6 +136,7 @@ def cleanfile(input, la_code, la_log_dir, output):
     # Clean stream
     stream = cleaner.clean_categories(stream)
     stream = cleaner.clean_dates(stream)
+    stream = logger.log_errors(stream)
 
     # Output results
     stream = csww_record.message_collector(stream)
@@ -226,13 +230,6 @@ def pan_agg(input, la_code, output):
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 # )
 
-cleanfile(
-    r"C:\Users\patrick.troy\Downloads\LIIA tests\social_work_workforce_2022.xml",
-    "NEW",
-    r"C:\Users\patrick.troy\Downloads\LIIA tests",
-    r"C:\Users\patrick.troy\Downloads\LIIA tests",
-)
-
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
new file mode 100644
index 00000000..06fb8798
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
@@ -0,0 +1,262 @@
+import logging
+from typing import List
+import xml.etree.ElementTree as ET
+import xmlschema
+from xmlschema import XMLSchemaValidatorError
+
+from sfdata_stream_parser.checks import type_check
+from sfdata_stream_parser import events
+from sfdata_stream_parser.collectors import collector, block_check
+from sfdata_stream_parser.filters.generic import streamfilter, pass_event
+
+log = logging.getLogger(__name__)
+
+
+@streamfilter(default_args=lambda: {"context": []})
+def add_context(event, context: List[str]):
+    """
+    Adds 'context' to XML structures. For each :class:`sfdata_stream_parser.events.StartElement` the tag name is
+    added to a 'context' tuple, and for each :class:`sfdata_stream_parser.events.EndElement` the context is popped.
+
+    For all other events, the context tuple is set as-is.
+
+    Provides: context
+    """
+    if isinstance(event, events.StartElement):
+        context.append(event.tag)
+        local_context = tuple(context)
+    elif isinstance(event, events.EndElement):
+        local_context = tuple(context)
+        context.pop()
+    else:
+        local_context = tuple(context)
+    return event.from_event(event, context=local_context)
+
+
+@streamfilter(check=type_check(events.TextNode), fail_function=pass_event)
+def strip_text(event):
+    """
+    Strips surrounding whitespaces from :class:`sfdata_stream_parser.events.TextNode`. If the event does
+    not have a text property then this filter fails silently.
+    """
+    if not hasattr(event, "text"):
+        return event
+
+    if event.text is None:
+        return event
+
+    text = event.text.strip()
+    return event.from_event(event, text=text)
+
+
+def _create_category_dict(field: str, file: str):
+    """
+    Create a dictionary containing the different categorical values of a given field to conform categories
+    e.g. {'category': [{'code': '0', 'name': 'Not an Agency Worker'}, {'code': '1', 'name': 'Agency Worker'}]}
+
+    :param field: Name of the categorical field you want to find the values for
+    :param file: Path to the .xsd schema containing possible categories
+    :return: Dictionary of categorical values and potential alternatives
+    """
+    category_dict = {"category": []}
+
+    xsd_xml = ET.parse(file)
+    search_elem = f".//{{http://www.w3.org/2001/XMLSchema}}simpleType[@name='{field}']"
+    element = xsd_xml.find(search_elem)
+
+    if element is not None:
+        search_value = f".//{{http://www.w3.org/2001/XMLSchema}}enumeration"
+        value = element.findall(search_value)
+        if value:
+            for v in value:
+                code_dict = {"code": v.get("value")}
+                category_dict["category"].append(code_dict)
+
+            search_doc = f".//{{http://www.w3.org/2001/XMLSchema}}documentation"
+            documentation = element.findall(search_doc)
+            for i, d in enumerate(documentation):
+                name_dict = {"name": d.text}
+                category_dict["category"][i] = {**category_dict["category"][i], **name_dict}
+
+            return category_dict
+
+    else:
+        return
+
+
+def _create_float_dict(field: str, file: str):
+    float_dict = None
+
+    xsd_xml = ET.parse(file)
+    search_elem = f".//{{http://www.w3.org/2001/XMLSchema}}simpleType[@name='{field}']"
+    element = xsd_xml.find(search_elem)
+
+    search_restriction = f".//{{http://www.w3.org/2001/XMLSchema}}restriction"
+    restriction = element.findall(search_restriction)
+    for r in restriction:
+        code_dict = {"numeric": r.get("base")[3:]}  # Remove the "xs:" from the start of the base string
+        if code_dict["numeric"] == "decimal":
+            float_dict = code_dict
+
+    search_fraction_digits = f".//{{http://www.w3.org/2001/XMLSchema}}fractionDigits"
+    fraction_digits = element.findall(search_fraction_digits)
+    for f in fraction_digits:
+        fraction_digits_dict = {"fixed": f.get("fixed"), "decimal": f.get("value")}
+        float_dict = {**float_dict, **fraction_digits_dict}
+
+    search_min_inclusive = f".//{{http://www.w3.org/2001/XMLSchema}}minInclusive"
+    min_inclusive = element.findall(search_min_inclusive)
+    for m in min_inclusive:
+        min_dict = {"min_inclusive": m.get("value")}
+        float_dict = {**float_dict, **min_dict}
+
+    search_max_inclusive = f".//{{http://www.w3.org/2001/XMLSchema}}maxInclusive"
+    max_inclusive = element.findall(search_max_inclusive)
+    for m in max_inclusive:
+        max_dict = {"max_inclusive": m.get("value")}
+        float_dict = {**float_dict, **max_dict}
+
+    return float_dict
+
+
+def _create_regex_dict(field: str, file: str):
+    regex_dict = None
+
+    xsd_xml = ET.parse(file)
+    search_elem = f".//{{http://www.w3.org/2001/XMLSchema}}simpleType[@name='{field}']"
+    element = xsd_xml.find(search_elem)
+
+    search_restriction = f".//{{http://www.w3.org/2001/XMLSchema}}restriction"
+    restriction = element.findall(search_restriction)
+    for r in restriction:
+        if r.get("base") == "xs:string":
+            regex_dict = {"regex_string": None}
+
+        search_pattern = f".//{{http://www.w3.org/2001/XMLSchema}}pattern"
+        pattern = element.findall(search_pattern)
+        for p in pattern:
+            regex_dict["regex_string"] = p.get("value")
+
+    return regex_dict
+
+
+@streamfilter()
+def add_schema(event, schema: xmlschema.XMLSchema):
+    """
+    Requires each event to have event.context as set by :func:`add_context`
+
+    Based on the context (a tuple of element tags) it will set path which is the
+    derived path (based on the context tags) joined by '/' and schema holding the
+    corresponding schema element, if found.
+
+    Provides: path, schema
+    """
+    assert (
+        event.context
+    ), "This filter required event.context to be set - see add_context"
+    path = "/".join(event.context)
+    tag = event.context[-1]
+    el = schema.get_element(tag, path)
+
+    return event.from_event(event, path=path, schema=el)
+
+
+@streamfilter(check=type_check(events.TextNode), fail_function=pass_event)
+def add_schema_dict(event, schema_path: str):
+    schema_dict = None
+
+    config_type = event.schema.type.name
+    if config_type is not None:
+        if config_type[-4:] == "type":
+            schema_dict = _create_category_dict(config_type, schema_path)
+        if config_type in ["onedecimalplace", "twodecimalplaces", "ftetype"]:
+            schema_dict = _create_float_dict(config_type, schema_path)
+        if config_type in ["swetype"]:
+            schema_dict = _create_regex_dict(config_type, schema_path)
+        if config_type == "{http://www.w3.org/2001/XMLSchema}date":
+            schema_dict = {"date": "%Y/%m/%d"}
+        if config_type == "{http://www.w3.org/2001/XMLSchema}integer":
+            schema_dict = {"numeric": "integer"}
+        if config_type == "{http://www.w3.org/2001/XMLSchema}string":
+            schema_dict = {"string": "alphanumeric"}
+
+        if schema_dict is not None:
+            if event.schema.occurs[0] == 0:
+                schema_dict = {**schema_dict, **{"canbeblank": True}}
+            elif event.schema.occurs[0] == 1:
+                schema_dict = {**schema_dict, **{"canbeblank": False}}
+
+    return event.from_event(event, schema_dict=schema_dict)
+
+
+def _get_validation_error(schema, node) -> XMLSchemaValidatorError:
+    try:
+        schema.validate(node)
+        return None
+    except XMLSchemaValidatorError as e:
+        return e
+
+
+@streamfilter(check=type_check(events.StartElement), fail_function=pass_event)
+def validate_elements(event):
+    """
+    Validates each element, and if not valid, sets the properties:
+
+    * valid - (always False)
+    * validation_message - a descriptive validation message
+    """
+    validation_error = _get_validation_error(event.schema, event.node)
+    if validation_error is None:
+        return event
+
+    message = (
+        validation_error.reason
+        if hasattr(validation_error, "reason")
+        else validation_error.message
+    )
+    return events.StartElement.from_event(
+        event, valid=False, validation_message=message
+    )
+
+
+@streamfilter(check=type_check(events.StartElement), fail_function=pass_event)
+def prop_to_attribute(event, prop_name):
+    """
+    Elevates an event property to an XML attribute.
+    """
+    if hasattr(event, prop_name):
+        attrs = getattr(event, "attrs", {})
+        attrs[prop_name] = getattr(event, prop_name)
+        return events.StartElement.from_event(event, attrs=attrs)
+    else:
+        return event
+
+
+@collector(check=block_check(events.StartElement), receive_stream=True)
+def remove_invalid(stream, tag_name):
+    """
+    Filters out events with the given tag name if they are not valid
+    """
+    stream = list(stream)
+    first = stream[0]
+    last = stream[-1]
+    stream = stream[1:-1]
+
+    if first.tag == tag_name and not getattr(first, "valid", True):
+        yield from []
+    else:
+        yield first
+
+        if len(stream) > 0:
+            yield from remove_invalid(stream, tag_name=tag_name)
+
+        yield last
+
+
+@streamfilter(check=lambda x: True)
+def counter(event, counter_check, context):
+    if counter_check(event):
+        context["pass"] += 1
+    else:
+        context["fail"] += 1
+    return event
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
new file mode 100644
index 00000000..1fc9cf88
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
@@ -0,0 +1,225 @@
+from collections import Counter
+from datetime import datetime
+import logging
+import os
+
+from sfdata_stream_parser import events
+from sfdata_stream_parser.filters.generic import streamfilter, pass_event
+from sfdata_stream_parser.checks import type_check
+
+log = logging.getLogger(__name__)
+
+
+class ErrorTable(events.ParseEvent):
+    pass
+
+
+def create_formatting_error_count(stream):
+    """
+    Create a list of the column headers for cells with formatting errors (event.error = 1) for each table
+
+    :param stream: A filtered list of event objects
+    :return: An updated list of event objects with error counts
+    """
+    formatting_error_count = None
+    for event in stream:
+        if isinstance(event, events.StartElement) and event.tag == "LALevelVacancies":
+            formatting_error_count = []
+        elif isinstance(event, events.EndElement) and event.tag == "Message":
+            yield ErrorTable.from_event(
+                event,
+                formatting_error_count=formatting_error_count,
+            )
+            formatting_error_count = None
+        elif (
+            formatting_error_count is not None
+            and isinstance(event, events.TextNode)
+        ):
+            try:
+                if event.error == "1":
+                    formatting_error_count.append(event.schema.name)
+            except AttributeError:  # Raised in case there is no event.error
+                pass
+        yield event
+
+
+@streamfilter(
+    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+)
+def blank_error_check(event):
+    """
+    Check all the values against the config to see if they are allowed to be blank
+    if they are blank but should not be, record this as event.blank_error = 1
+
+    :param event: A filtered list of event objects of type Cell
+    :return: An updated list of event objects
+    """
+    try:
+        allowed_blank = event.schema_dict["canbeblank"]
+        error = getattr(event, "error", "0")
+        if not allowed_blank and not event.text and error != "1":
+            return event.from_event(event, blank_error="1")
+        else:
+            return event
+    except AttributeError:  # Raised in case there is no config item for the given cell
+        pass
+
+
+def create_blank_error_count(stream):
+    """
+    Create a list of the column headers for cells with blank fields that should not be blank (event.blank_error = 1)
+    for each table
+
+    :param stream: A filtered list of event objects
+    :return: An updated list of event objects
+    """
+    blank_error_count = None
+    for event in stream:
+        if isinstance(event, events.StartTable):
+            blank_error_count = []
+        elif isinstance(event, events.EndTable):
+            blank_error_count = None
+        elif isinstance(event, ErrorTable):
+            yield ErrorTable.from_event(event, blank_error_count=blank_error_count)
+            blank_error_count = None
+        elif blank_error_count is not None and isinstance(event, events.Cell):
+            try:
+                if event.blank_error == "1":
+                    blank_error_count.append(event.header)
+            except AttributeError:
+                pass
+        yield event
+
+
+@streamfilter(
+    check=type_check(events.StartTable),
+    fail_function=pass_event,
+    error_function=pass_event,
+)
+def create_file_match_error(event):
+    """
+    Add a match_error to StartTables that do not have an event.sheet_name so these errors can be written to the log.txt
+    file. If there is no event.sheet_name for a given StartTable that means its headers did not match any of those
+    in the config file
+
+    :param event: A filtered list of event objects of type StartTable
+    :return: An updated list of event objects
+    """
+    try:
+        if event.table_name:
+            return event
+    except AttributeError:
+        return event.from_event(
+            event,
+            match_error=f"Failed to find a set of matching columns headers for file titled "
+            f"'{event.filename}' which contains column headers {event.headers} so no output has been produced",
+        )
+    return event
+
+
+@streamfilter(
+    check=type_check(events.StartTable),
+    fail_function=pass_event,
+    error_function=pass_event,
+)
+def create_extra_column_error(event):
+    """
+    Add a extra_column_error to StartTables that have more columns than the set of expected columns so these can be written to the log.txt
+
+    :param event: A filtered list of event objects of type StartTable
+    :return: An updated list of event objects
+    """
+    extra_columns = [
+        item for item in event.headers if item not in event.expected_columns
+    ]
+    if len(extra_columns) == 0:
+        return event
+    else:
+        return event.from_event(
+            event,
+            extra_column_error=f"Additional columns were found in file titled "
+            f"'{event.filename}' than those expected from schema for filetype = {event.table_name}, so these columns have been removed: {extra_columns}",
+        )
+
+
+def save_errors_la(stream, la_log_dir):
+    """
+    Count the error events and save them as a text file in the Local Authority Logs directory
+    only save the error events if there is at least one error in said event
+
+    :param stream: A filtered list of event objects
+    :param la_log_dir: Location to save the gathered error logs
+    :return: An updated list of event objects
+    """
+    start_time = f"{datetime.now():%Y-%m-%dT%H%M%SZ}"
+    for event in stream:
+        try:
+            if isinstance(event, ErrorTable) and (
+                event.formatting_error_count is not None
+                and event.blank_error_count is not None
+                and event.table_name is not None
+            ):
+                if event.formatting_error_count or event.blank_error_count:
+                    with open(
+                        f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
+                        "a",
+                    ) as f:
+                        f.write(event.table_name)
+                        f.write("\n")
+                        if event.formatting_error_count:
+                            f.write(
+                                "Number of cells that have been made blank "
+                                "because they could not be formatted correctly"
+                            )
+                            f.write("\n")
+                            counter_dict = Counter(event.formatting_error_count)
+                            f.write(
+                                str(counter_dict)[9:-2]
+                            )  # Remove "Counter({" and "})" from string
+                            f.write("\n")
+                        if event.blank_error_count:
+                            f.write(
+                                "Number of blank cells that should have contained data"
+                            )
+                            f.write("\n")
+                            blank_counter_dict = Counter(event.blank_error_count)
+                            f.write(
+                                str(blank_counter_dict)[9:-2]
+                            )  # Remove "Counter({" and "})" from string
+                            f.write("\n")
+        except AttributeError:
+            pass
+
+        if isinstance(event, events.StartTable):
+            match_error = getattr(event, "match_error", None)
+            if match_error:
+                with open(
+                    f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
+                    "a",
+                ) as f:
+                    f.write(match_error)
+                    f.write("\n")
+            column_error = getattr(event, "extra_column_error", None)
+            if column_error:
+                with open(
+                    f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
+                    "a",
+                ) as f:
+                    f.write(column_error)
+                    f.write("\n")
+        yield event
+
+
+def log_errors(stream):
+    """
+    Compile the log error functions
+
+    :param stream: A filtered list of event objects
+    :return: An updated list of event objects
+    """
+    stream = blank_error_check(stream)
+    # stream = create_formatting_error_count(stream)
+    # stream = create_blank_error_count(stream)
+    # stream = create_file_match_error(stream)
+    # stream = create_extra_column_error(stream)
+    return stream
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py
new file mode 100644
index 00000000..58bc03fc
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py
@@ -0,0 +1,49 @@
+from sfdata_stream_parser.events import (
+    StartElement,
+    EndElement,
+    TextNode,
+    CommentNode,
+    ProcessingInstructionNode,
+)
+
+try:
+    from lxml import etree
+except ImportError:
+    pass
+
+
+def dom_parse(source, **kwargs):
+    """
+    Equivalent of the xml parse included in the sfdata_stream_parser package, but uses the ET DOM
+    and allows direct DOM manipulation.
+    """
+    parser = etree.iterparse(source, events=("start", "end", "comment", "pi"), **kwargs)
+    for action, elem in parser:
+        if action == "start":
+            yield StartElement(tag=elem.tag, attrib=elem.attrib, node=elem)
+            yield TextNode(text=elem.text)
+        elif action == "end":
+            yield EndElement(tag=elem.tag, node=elem)
+            if elem.tail:
+                yield TextNode(text=elem.tail)
+        elif action == "comment":
+            yield CommentNode(text=elem.text, node=elem)
+        elif action == "pi":
+            yield ProcessingInstructionNode(name=elem.target, text=elem.text, node=elem)
+        else:
+            raise ValueError(f"Unknown event: {action}")
+
+
+def to_xml(stream, builder: etree.TreeBuilder):
+    for ev in stream:
+        if isinstance(ev, StartElement):
+            builder.start(ev.tag, getattr(ev, "attrs", {}))
+        elif isinstance(ev, EndElement):
+            builder.end(ev.tag)
+        elif isinstance(ev, TextNode):
+            builder.data(ev.text)
+        elif isinstance(ev, CommentNode):
+            builder.comment(ev.text)
+        elif isinstance(ev, ProcessingInstructionNode):
+            builder.pi(ev.name, ev.text)
+        yield ev

From 028e589fc2dddc7da2a5cafa70f25862d08adc3c Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Fri, 7 Jul 2023 16:20:25 +0000
Subject: [PATCH 24/40] Work in progress

---
 .../csdatatools/datasets/cincensus/filters.py |  2 +-
 .../csww_main_functions.py                    | 28 +++++----
 .../lds_csww_clean/cleaner.py                 | 48 +++++++++++++--
 .../lds_csww_clean/converters.py              | 60 +++++++++++++++++++
 4 files changed, 120 insertions(+), 18 deletions(-)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py

diff --git a/liiatools/csdatatools/datasets/cincensus/filters.py b/liiatools/csdatatools/datasets/cincensus/filters.py
index 87bedf33..fe3d48e5 100644
--- a/liiatools/csdatatools/datasets/cincensus/filters.py
+++ b/liiatools/csdatatools/datasets/cincensus/filters.py
@@ -179,7 +179,7 @@ def add_schema_dict(event, schema_path: str):
         if config_type in ["swetype"]:
             schema_dict = _create_regex_dict(config_type, schema_path)
         if config_type == "{http://www.w3.org/2001/XMLSchema}date":
-            schema_dict = {"date": "%d/%m/%Y"}
+            schema_dict = {"date": "%Y-%m-%d"}
         if config_type == "{http://www.w3.org/2001/XMLSchema}integer":
             schema_dict = {"numeric": "integer"}
         if config_type == "{http://www.w3.org/2001/XMLSchema}string":
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 4f2e3e93..11189a99 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -130,9 +130,13 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
     stream = filters.add_schema_dict(stream, schema_path=FilePath(input_year).path)
 
+    # for e in stream:
+    #     print(e.get('schema_dict'))
     # Clean stream
     stream = cleaner.clean_categories(stream)
     stream = cleaner.clean_dates(stream)
+    stream = cleaner.clean_numeric(stream) # TODO: implement function for decimal (works for integer)
+    stream = cleaner.clean_regex_string(stream) # TODO: implement function
 
     # Output results
     stream = csww_record.message_collector(stream)
@@ -214,24 +218,24 @@ def pan_agg(input, la_code, output):
 
 # Run in Visual Studio Code |>
 
-# cleanfile(
-#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-#     "BAD",
-#     "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
+cleanfile(
+    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+    "BAD",
+    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
 
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 # )
 
-cleanfile(
-    r"C:\Users\patrick.troy\Downloads\LIIA tests\social_work_workforce_2022.xml",
-    "NEW",
-    r"C:\Users\patrick.troy\Downloads\LIIA tests",
-    r"C:\Users\patrick.troy\Downloads\LIIA tests",
-)
+# cleanfile(
+#     r"C:\Users\patrick.troy\Downloads\LIIA tests\social_work_workforce_2022.xml",
+#     "NEW",
+#     r"C:\Users\patrick.troy\Downloads\LIIA tests",
+#     r"C:\Users\patrick.troy\Downloads\LIIA tests",
+# )
 
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index b85b5da1..1dfc7bae 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -4,7 +4,7 @@
 from sfdata_stream_parser import events
 from sfdata_stream_parser.filters.generic import streamfilter, pass_event
 
-from liiatools.datasets.s903.lds_ssda903_clean.converters import to_category, to_integer
+from liiatools.datasets.social_work_workforce.lds_csww_clean.converters import to_category, to_integer, to_decimal
 
 from liiatools.datasets.shared_functions.converters import to_date
 #from liiatools.datasets.shared_functions.common import check_postcode
@@ -22,9 +22,9 @@ def clean_dates(event):
     :param event: A filtered list of event objects of type text
     :return: An updated list of event objects
     """
-    date = event.schema_dict["date"]
+    dateformat = event.schema_dict["date"]
     try:
-        newtext = to_date(event.text, date)
+        newtext = to_date(event.text, dateformat)
         return event.from_event(event, text=newtext, error="0")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
@@ -44,7 +44,45 @@ def clean_categories(event):
     try:
         newtext = to_category(event.text, category)
         if newtext != "error":
-            return event.from_event(event, text=newtext, error='0')
+            return event.from_event(event, text=f"*cat*{newtext}", error='0')
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, text="", error="1")
\ No newline at end of file
+        return event.from_event(event, text="", error="1")
+
+
+@streamfilter(
+    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+)
+def clean_numeric(event):
+    """
+    Convert all values that should be integers to integers based on the schema xsd file
+
+    :param event: A filtered list of event objects of type integer
+    :return: An updated list of event objects
+    """
+    numeric = event.schema_dict["numeric"]
+    #print(integer)
+    try:
+        if numeric == "integer":
+            newtext = to_integer(event.text, numeric)
+        elif numeric == "decimal":
+            newtext = to_decimal(event.text, numeric)
+        if newtext != "error":
+            return event.from_event(event, text=f"*{numeric[:3]}*{newtext}", error='0')
+        return event.from_event(event, text="", error="1")
+    except (AttributeError, TypeError, ValueError):
+        return event.from_event(event, text="", error="1")
+
+
+@streamfilter(
+    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+)   
+def clean_regex_string(event):
+    """
+    Convert all values that should be regex strings to regex strings based on the schema xsd file
+
+    :param event: A filtered list of event objects of type regex string
+    :return: An updated list of event objects
+    """
+    pattern = event.schema_dict["regex_string"]
+    print(pattern)
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
new file mode 100644
index 00000000..c09c6bd5
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
@@ -0,0 +1,60 @@
+import logging
+
+log = logging.getLogger(__name__)
+
+
+def to_category(string, categories):
+    """
+    Matches a string to a category based on categories given in a config file
+    the config file should contain a dictionary for each category for this function to loop through
+    return blank if no categories found
+
+    :param string: Some string to convert into a category value
+    :param categories: A list of dictionaries containing different category:value pairs
+    :return: Either a category value, "error" or blank string
+    """
+    for code in categories:
+        if str(string).lower() == str(code["code"]).lower():
+            return code["code"]
+        if (
+            str(string).lower() == str(code["code"]).lower() + ".0"
+        ):  # In case integers are read as floats
+            return code["code"]
+        elif "name" in code:
+            if str(code["name"]).lower() in str(string).lower():
+                return code["code"]
+            elif not string:
+                return ""
+        elif not string:
+            return ""
+    return "error"
+
+
+def to_integer(value, config):
+    """
+    Convert any strings that should be integers based on the config into integers
+
+    :param value: Some value to convert to an integer
+    :param config: The loaded configuration
+    :return: Either an integer value or a blank string
+    """
+    if config == "integer":
+        if isinstance(value, str) and value[-2:] == ".0":
+            return int(float(value))
+        elif value or value == 0:
+            return int(value)
+        else:
+            return ""
+    else:
+        return value
+    
+
+def to_decimal(value, config):
+    """
+    Convert any strings that should be decimal based on the config into decimals
+
+    :param value: Some value to convert to a decimal
+    :param config: The loaded configuration
+    :return: Either a decimal value or a blank string
+    """
+    pass

From 4de8ea4bb0fadc1f6b131d2d6c0d62bb5080f46f Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 10 Jul 2023 12:07:39 +0000
Subject: [PATCH 25/40] Implement cleaner and unit test

---
 .../csww_main_functions.py                    |  4 +-
 .../lds_csww_clean/cleaner.py                 | 31 +++++----
 .../lds_csww_clean/converters.py              | 52 ++++++++++++---
 .../lds_csww_clean/file_creator.py            |  2 +-
 .../lds_csww_clean/filters.py                 |  2 +-
 .../social_work_workforce/test_converters.py  | 65 +++++++++++++++++++
 6 files changed, 131 insertions(+), 25 deletions(-)
 create mode 100644 tests/social_work_workforce/test_converters.py

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index b614074c..438574e4 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -138,8 +138,8 @@ def cleanfile(input, la_code, la_log_dir, output):
     # Clean stream
     stream = cleaner.clean_categories(stream)
     stream = cleaner.clean_dates(stream)
-    stream = cleaner.clean_numeric(stream) # TODO: implement function for decimal (works for integer)
-    stream = cleaner.clean_regex_string(stream) # TODO: implement function
+    stream = cleaner.clean_numeric(stream)
+    stream = cleaner.clean_regex_string(stream)
     stream = logger.log_errors(stream)
 
     # Output results
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 1dfc7bae..1948265d 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -4,7 +4,7 @@
 from sfdata_stream_parser import events
 from sfdata_stream_parser.filters.generic import streamfilter, pass_event
 
-from liiatools.datasets.social_work_workforce.lds_csww_clean.converters import to_category, to_integer, to_decimal
+from liiatools.datasets.social_work_workforce.lds_csww_clean.converters import to_category, to_integer, to_decimal, to_regex
 
 from liiatools.datasets.shared_functions.converters import to_date
 #from liiatools.datasets.shared_functions.common import check_postcode
@@ -19,7 +19,7 @@ def clean_dates(event):
     """
     Convert all values that should be dates to dates based on the schema xsd file
 
-    :param event: A filtered list of event objects of type text
+    :param event: A filtered list of event objects of type TextNode
     :return: An updated list of event objects
     """
     dateformat = event.schema_dict["date"]
@@ -37,14 +37,14 @@ def clean_categories(event):
     """
     Convert all values that should be categories to categories based on the schema xsd file
 
-    :param event: A filtered list of event objects of type text
+    :param event: A filtered list of event objects of type TextNode
     :return: An updated list of event objects
     """
     category = event.schema_dict["category"]
     try:
         newtext = to_category(event.text, category)
         if newtext != "error":
-            return event.from_event(event, text=f"*cat*{newtext}", error='0')
+            return event.from_event(event, text=newtext, error='0')
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
@@ -55,20 +55,21 @@ def clean_categories(event):
 )
 def clean_numeric(event):
     """
-    Convert all values that should be integers to integers based on the schema xsd file
+    Convert all values that should be integers or decimals to integers or decimals based on the schema xsd file
 
-    :param event: A filtered list of event objects of type integer
+    :param event: A filtered list of event objects of type TextNode
     :return: An updated list of event objects
     """
     numeric = event.schema_dict["numeric"]
-    #print(integer)
     try:
         if numeric == "integer":
             newtext = to_integer(event.text, numeric)
         elif numeric == "decimal":
-            newtext = to_decimal(event.text, numeric)
+            #print(event.schema_dict["fixed"], event.schema_dict["decimal"])
+            decimalplaces = int(event.schema_dict["decimal"])
+            newtext = to_decimal(event.text, numeric, decimalplaces)
         if newtext != "error":
-            return event.from_event(event, text=f"*{numeric[:3]}*{newtext}", error='0')
+            return event.from_event(event, text=newtext, error='0')
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
@@ -76,13 +77,19 @@ def clean_numeric(event):
 
 @streamfilter(
     check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
-)   
+)
 def clean_regex_string(event):
     """
     Convert all values that should be regex strings to regex strings based on the schema xsd file
 
-    :param event: A filtered list of event objects of type regex string
+    :param event: A filtered list of event objects of type TextNode
     :return: An updated list of event objects
     """
     pattern = event.schema_dict["regex_string"]
-    print(pattern)
\ No newline at end of file
+    try:
+        newtext = to_regex(event.text, pattern)
+        if newtext != "error":
+            return event.from_event(event, text=newtext, error="0")
+        return event.from_event(event, text="", error="1")
+    except (AttributeError, TypeError, ValueError):
+        return event.from_event(event, text="", error="1")
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
index c09c6bd5..4cd72810 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
@@ -1,12 +1,13 @@
 import logging
+import re
 
 log = logging.getLogger(__name__)
 
 
 def to_category(string, categories):
     """
-    Matches a string to a category based on categories given in a config file
-    the config file should contain a dictionary for each category for this function to loop through
+    Matches a string to a category based on categories given in a schema file
+    the schema file should contain a dictionary for each category for this function to loop through
     return blank if no categories found
 
     :param string: Some string to convert into a category value
@@ -18,12 +19,12 @@ def to_category(string, categories):
             return code["code"]
         if (
             str(string).lower() == str(code["code"]).lower() + ".0"
-        ):  # In case integers are read as floats
+            ):  # In case integers are read as floats
             return code["code"]
-        elif "name" in code:
+        if "name" in code:
             if str(code["name"]).lower() in str(string).lower():
                 return code["code"]
-            elif not string:
+            if not string:
                 return ""
         elif not string:
             return ""
@@ -47,14 +48,47 @@ def to_integer(value, config):
             return ""
     else:
         return value
-    
 
-def to_decimal(value, config):
+
+def to_decimal(value, config, decplaces=0):
     """
     Convert any strings that should be decimal based on the config into decimals
 
     :param value: Some value to convert to a decimal
     :param config: The loaded configuration
-    :return: Either a decimal value or a blank string
+    :param decplaces: The number of decimal places 
+    :return: Either a decimal value formatted to number of decimal places or a blank string
+    """
+    dpdisplayformat= f".{decplaces}f"
+    if config == "decimal":
+        try:
+            float(value)
+            roundtodp = round(float(value), decplaces)
+            return f"{roundtodp: {dpdisplayformat}}".strip()
+        except (ValueError, TypeError):
+            return ""
+    return value
+
+
+def to_regex(value, pattern):
     """
-    pass
+    Convert any strings that should conform to regex pattern based on the schema into regex string
+
+    :param value: Some value to convert to a regex string
+    :param pattern: The regex pattern to compare
+    :return: Either a regex string or a blank string
+    """
+    if pattern:
+        if value:
+            try:
+                isfullmatch = re.fullmatch(pattern, value)
+                if isfullmatch:
+                    return value
+                else:
+                    return ""
+            except (ValueError, TypeError):
+                return ""
+        else:
+            return ""
+    else:
+        return value
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index 66fe79a0..38da3243 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -48,7 +48,7 @@ def degrade_SWENo(data):
     """
     if "SWENo" in data:
         if data["SWENo"] is not None:
-            data["SWENo"] = data["SWENo"].apply(lambda row: swe_hash(row))
+            data["SWENo"] = data["SWENo"].apply(lambda row: swe_hash(row) if row else row )
     return data
 
 
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
index 06fb8798..eafb9297 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
@@ -174,7 +174,7 @@ def add_schema_dict(event, schema_path: str):
         if config_type in ["swetype"]:
             schema_dict = _create_regex_dict(config_type, schema_path)
         if config_type == "{http://www.w3.org/2001/XMLSchema}date":
-            schema_dict = {"date": "%Y/%m/%d"}
+            schema_dict = {"date": "%Y-%m-%d"}
         if config_type == "{http://www.w3.org/2001/XMLSchema}integer":
             schema_dict = {"numeric": "integer"}
         if config_type == "{http://www.w3.org/2001/XMLSchema}string":
diff --git a/tests/social_work_workforce/test_converters.py b/tests/social_work_workforce/test_converters.py
new file mode 100644
index 00000000..d5d2bce2
--- /dev/null
+++ b/tests/social_work_workforce/test_converters.py
@@ -0,0 +1,65 @@
+from liiatools.datasets.social_work_workforce.lds_csww_clean import converters
+
+
+def test_to_category():
+    category_dict = [
+        {"code": "M1"},
+        {"code": "F1"},
+        {"code": "MM"},
+        {"code": "FF"},
+        {"code": "MF"},
+    ]
+    assert converters.to_category("M1", category_dict) == "M1"
+    assert converters.to_category("M2", category_dict) == "error"
+    assert converters.to_category("MF", category_dict) == "MF"
+    assert converters.to_category("", category_dict) == ""
+    assert converters.to_category(None, category_dict) == ""
+
+    category_dict = [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+    assert converters.to_category(0, category_dict) == "0"
+    assert converters.to_category("false", category_dict) == "0"
+    assert converters.to_category(1.0, category_dict) == "1"
+    assert converters.to_category("true", category_dict) == "1"
+    assert converters.to_category("string", category_dict) == "error"
+    assert converters.to_category(123, category_dict) == "error"
+    assert converters.to_category("", category_dict) == ""
+    assert converters.to_category(None, category_dict) == ""
+
+
+def test_to_integer():
+    assert converters.to_integer("3000", "integer") == 3000
+    assert converters.to_integer(123, "integer") == 123
+    assert converters.to_integer("1.0", "integer") == 1
+    assert converters.to_integer("date", "") == "date"
+    assert converters.to_integer(0, "integer") == 0
+    assert converters.to_integer("", "integer") == ""
+    assert converters.to_integer(None, "integer") == ""
+
+
+def test_to_decimal():
+    decimalplaces = 3
+    assert converters.to_decimal("12.345", "decimal", decimalplaces) == "12.345"
+    assert converters.to_decimal("12.3456", "decimal", decimalplaces) == "12.346"
+    assert converters.to_decimal("12.3", "decimal", decimalplaces) == "12.300"
+    assert converters.to_decimal(12.3456, "decimal", decimalplaces) == "12.346"
+    assert converters.to_decimal("1.0", "decimal", decimalplaces) == "1.000"
+    assert converters.to_decimal(0, "decimal", decimalplaces) == "0.000"
+    assert converters.to_decimal("date", "") == "date"
+    assert converters.to_decimal("", "decimal", decimalplaces) == ""
+    assert converters.to_decimal(None, "decimal", decimalplaces) == ""
+
+
+def test_to_regex():
+    pattern=r"[A-Za-z]{2}\d{10}"
+    assert converters.to_regex("AB1234567890",pattern) == "AB1234567890" # match
+    assert converters.to_regex("AB1234567890123456",pattern) == "" # too long
+    assert converters.to_regex("AB12345",pattern) == "" # too short
+    assert converters.to_regex("date", "") == "date" # no pattern
+    assert converters.to_regex("", pattern) == "" # no value
+    assert converters.to_regex(None, pattern) == "" # no value
+
+
+# test_to_category()
+# test_to_integer()
+test_to_decimal()
+#test_to_regex()

From be180f9cf23c7279f7d83520d5aebb52f5eefcf6 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 10 Jul 2023 12:11:42 +0000
Subject: [PATCH 26/40] Update test_converters

---
 tests/social_work_workforce/test_converters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/social_work_workforce/test_converters.py b/tests/social_work_workforce/test_converters.py
index d5d2bce2..4796cf90 100644
--- a/tests/social_work_workforce/test_converters.py
+++ b/tests/social_work_workforce/test_converters.py
@@ -61,5 +61,5 @@ def test_to_regex():
 
 # test_to_category()
 # test_to_integer()
-test_to_decimal()
-#test_to_regex()
+# test_to_decimal()
+# test_to_regex()

From 119d4ab75f80a33da51e95130ed365494e7d36df Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 12 Jul 2023 13:47:21 +0000
Subject: [PATCH 27/40] Implement logger, write unit tests

---
 .../csww_main_functions.py                    |   9 +-
 .../lds_csww_clean/cleaner.py                 |  27 +-
 .../lds_csww_clean/converters.py              |  52 ++--
 .../lds_csww_clean/logger.py                  |  68 ++---
 tests/social_work_workforce/test_cleaner.py   | 268 ++++++++++++++++++
 .../social_work_workforce/test_converters.py  |  25 +-
 tests/social_work_workforce/test_logger.py    | 144 ++++++++++
 7 files changed, 506 insertions(+), 87 deletions(-)
 create mode 100644 tests/social_work_workforce/test_cleaner.py
 create mode 100644 tests/social_work_workforce/test_logger.py

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 438574e4..ac0f3d5e 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -108,8 +108,8 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = dom_parse(input)
 
     # Get year from input file
+    filename = str(Path(input).resolve().stem)
     try:
-        filename = str(Path(input).resolve().stem)
         input_year = check_year(filename)
     except (AttributeError, ValueError):
         save_year_error(input, la_log_dir)
@@ -132,17 +132,16 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = filters.add_context(stream)
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
     stream = filters.add_schema_dict(stream, schema_path=FilePath(input_year).path)
-
-    # for e in stream:
-    #     print(e.get('schema_dict'))
     # Clean stream
     stream = cleaner.clean_categories(stream)
     stream = cleaner.clean_dates(stream)
     stream = cleaner.clean_numeric(stream)
     stream = cleaner.clean_regex_string(stream)
+
     stream = logger.log_errors(stream)
 
     # Output results
+    stream = logger.save_errors_la(stream, la_log_dir=la_log_dir, filename=filename)
     stream = csww_record.message_collector(stream)
 
     data_worker, data_lalevel = csww_record.export_table(stream)
@@ -225,7 +224,7 @@ def pan_agg(input, la_code, output):
 cleanfile(
     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
     "BAD",
-    "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
 )
 
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 1948265d..22287af8 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -24,8 +24,8 @@ def clean_dates(event):
     """
     dateformat = event.schema_dict["date"]
     try:
-        newtext = to_date(event.text, dateformat)
-        return event.from_event(event, text=newtext, error="0")
+        clean_text = to_date(event.text, dateformat)
+        return event.from_event(event, text=clean_text, error="0")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
 
@@ -42,9 +42,9 @@ def clean_categories(event):
     """
     category = event.schema_dict["category"]
     try:
-        newtext = to_category(event.text, category)
-        if newtext != "error":
-            return event.from_event(event, text=newtext, error='0')
+        clean_text = to_category(event.text, category)
+        if clean_text != "error":
+            return event.from_event(event, text=clean_text, error='0')
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
@@ -63,13 +63,12 @@ def clean_numeric(event):
     numeric = event.schema_dict["numeric"]
     try:
         if numeric == "integer":
-            newtext = to_integer(event.text, numeric)
+            clean_text = to_integer(event.text, numeric)
         elif numeric == "decimal":
-            #print(event.schema_dict["fixed"], event.schema_dict["decimal"])
-            decimalplaces = int(event.schema_dict["decimal"])
-            newtext = to_decimal(event.text, numeric, decimalplaces)
-        if newtext != "error":
-            return event.from_event(event, text=newtext, error='0')
+            decimal_places = int(event.schema_dict["decimal"])
+            clean_text = to_decimal(event.text, numeric, decimal_places)
+        if clean_text != "error":
+            return event.from_event(event, text=clean_text, error='0')
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
@@ -87,9 +86,9 @@ def clean_regex_string(event):
     """
     pattern = event.schema_dict["regex_string"]
     try:
-        newtext = to_regex(event.text, pattern)
-        if newtext != "error":
-            return event.from_event(event, text=newtext, error="0")
+        clean_text = to_regex(event.text, pattern)
+        if clean_text != "error":
+            return event.from_event(event, text=clean_text, error="0")
         return event.from_event(event, text="", error="1")
     except (AttributeError, TypeError, ValueError):
         return event.from_event(event, text="", error="1")
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
index 4cd72810..f3ba9ddc 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
@@ -12,7 +12,7 @@ def to_category(string, categories):
 
     :param string: Some string to convert into a category value
     :param categories: A list of dictionaries containing different category:value pairs
-    :return: Either a category value, "error" or blank string
+    :return: Either a category value, "error" if category is invalid or blank string
     """
     for code in categories:
         if str(string).lower() == str(code["code"]).lower():
@@ -37,36 +37,39 @@ def to_integer(value, config):
 
     :param value: Some value to convert to an integer
     :param config: The loaded configuration
-    :return: Either an integer value or a blank string
+    :return: Either an integer value or an "error" string if value could not be formatted as integer or a blank string if no value provided
     """
     if config == "integer":
-        if isinstance(value, str) and value[-2:] == ".0":
-            return int(float(value))
-        elif value or value == 0:
-            return int(value)
-        else:
-            return ""
+        if value or value==0:
+            if isinstance(value, str) and value[-2:] == ".0":
+                return int(float(value))
+            elif value or value == 0:
+                return int(value)
+            else:
+                return "error" # value incorrectly formatted
+        return "" # no value provided
     else:
         return value
 
 
-def to_decimal(value, config, decplaces=0):
+def to_decimal(value, config, decimal_places=0):
     """
     Convert any strings that should be decimal based on the config into decimals
 
     :param value: Some value to convert to a decimal
     :param config: The loaded configuration
-    :param decplaces: The number of decimal places 
-    :return: Either a decimal value formatted to number of decimal places or a blank string
+    :param dec_places: The number of decimal places to apply (default 0) 
+    :return: Either a decimal value formatted to number of decimal places or an "error" string if value could not be formatted as decimal or a blank string if no value provided
     """
-    dpdisplayformat= f".{decplaces}f"
     if config == "decimal":
-        try:
-            float(value)
-            roundtodp = round(float(value), decplaces)
-            return f"{roundtodp: {dpdisplayformat}}".strip()
-        except (ValueError, TypeError):
-            return ""
+        if value or value == 0:
+            try:
+                float(value)
+                round_to_dp = round(float(value), decimal_places)
+                return round_to_dp
+            except (ValueError, TypeError):
+                return "error" # value incorrectly formatted
+        return "" # no value provided
     return value
 
 
@@ -76,19 +79,20 @@ def to_regex(value, pattern):
 
     :param value: Some value to convert to a regex string
     :param pattern: The regex pattern to compare
-    :return: Either a regex string or a blank string
+    :return: Either a string matching the regex pattern or an "error" string if value does not match pattern or a blank string if no value provided
     """
     if pattern:
         if value:
+            stripped_value = value.strip()
             try:
-                isfullmatch = re.fullmatch(pattern, value)
+                isfullmatch = re.fullmatch(pattern, stripped_value)
                 if isfullmatch:
-                    return value
+                    return stripped_value
                 else:
-                    return ""
+                    return "error" # value does not match regex pattern
             except (ValueError, TypeError):
-                return ""
+                return "error" # value incorrectly formatted
         else:
-            return ""
+            return "" # no value provided
     else:
         return value
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
index 1fc9cf88..f9532d14 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
@@ -14,7 +14,7 @@ class ErrorTable(events.ParseEvent):
     pass
 
 
-def create_formatting_error_count(stream):
+def create_formatting_error_list(stream):
     """
     Create a list of the column headers for cells with formatting errors (event.error = 1) for each table
 
@@ -56,16 +56,16 @@ def blank_error_check(event):
     """
     try:
         allowed_blank = event.schema_dict["canbeblank"]
-        error = getattr(event, "error", "0")
-        if not allowed_blank and not event.text and error != "1":
+        format_error = getattr(event, "error", "0")
+        if not allowed_blank and not event.text and format_error != "1":
             return event.from_event(event, blank_error="1")
         else:
             return event
-    except AttributeError:  # Raised in case there is no config item for the given cell
+    except AttributeError:  # Raised in case there is no schema dict for the given cell
         pass
 
 
-def create_blank_error_count(stream):
+def create_blank_error_list(stream):
     """
     Create a list of the column headers for cells with blank fields that should not be blank (event.blank_error = 1)
     for each table
@@ -75,18 +75,21 @@ def create_blank_error_count(stream):
     """
     blank_error_count = None
     for event in stream:
-        if isinstance(event, events.StartTable):
+        if isinstance(event, events.StartElement) and event.tag == "LALevelVacancies":
             blank_error_count = []
-        elif isinstance(event, events.EndTable):
+        elif isinstance(event, events.EndElement) and event.tag == "Message":
             blank_error_count = None
         elif isinstance(event, ErrorTable):
             yield ErrorTable.from_event(event, blank_error_count=blank_error_count)
             blank_error_count = None
-        elif blank_error_count is not None and isinstance(event, events.Cell):
+        elif (
+            blank_error_count is not None
+            and isinstance(event, events.TextNode)
+        ):
             try:
                 if event.blank_error == "1":
-                    blank_error_count.append(event.header)
-            except AttributeError:
+                    blank_error_count.append(event.schema.name)
+            except AttributeError:  # Raised in case there is no event.blank_error
                 pass
         yield event
 
@@ -142,13 +145,14 @@ def create_extra_column_error(event):
         )
 
 
-def save_errors_la(stream, la_log_dir):
+def save_errors_la(stream, la_log_dir, filename):
     """
     Count the error events and save them as a text file in the Local Authority Logs directory
     only save the error events if there is at least one error in said event
 
     :param stream: A filtered list of event objects
     :param la_log_dir: Location to save the gathered error logs
+    :param filename: Filename to use
     :return: An updated list of event objects
     """
     start_time = f"{datetime.now():%Y-%m-%dT%H%M%SZ}"
@@ -157,14 +161,12 @@ def save_errors_la(stream, la_log_dir):
             if isinstance(event, ErrorTable) and (
                 event.formatting_error_count is not None
                 and event.blank_error_count is not None
-                and event.table_name is not None
             ):
                 if event.formatting_error_count or event.blank_error_count:
                     with open(
-                        f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
+                        f"{os.path.join(la_log_dir, filename)}_error_log_{start_time}.txt",
                         "a",
                     ) as f:
-                        f.write(event.table_name)
                         f.write("\n")
                         if event.formatting_error_count:
                             f.write(
@@ -190,23 +192,23 @@ def save_errors_la(stream, la_log_dir):
         except AttributeError:
             pass
 
-        if isinstance(event, events.StartTable):
-            match_error = getattr(event, "match_error", None)
-            if match_error:
-                with open(
-                    f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
-                    "a",
-                ) as f:
-                    f.write(match_error)
-                    f.write("\n")
-            column_error = getattr(event, "extra_column_error", None)
-            if column_error:
-                with open(
-                    f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
-                    "a",
-                ) as f:
-                    f.write(column_error)
-                    f.write("\n")
+        # if isinstance(event, events.StartTable):
+        #     match_error = getattr(event, "match_error", None)
+        #     if match_error:
+        #         with open(
+        #             f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
+        #             "a",
+        #         ) as f:
+        #             f.write(match_error)
+        #             f.write("\n")
+        #     column_error = getattr(event, "extra_column_error", None)
+        #     if column_error:
+        #         with open(
+        #             f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
+        #             "a",
+        #         ) as f:
+        #             f.write(column_error)
+        #             f.write("\n")
         yield event
 
 
@@ -218,8 +220,8 @@ def log_errors(stream):
     :return: An updated list of event objects
     """
     stream = blank_error_check(stream)
-    # stream = create_formatting_error_count(stream)
-    # stream = create_blank_error_count(stream)
+    stream = create_formatting_error_list(stream)
+    stream = create_blank_error_list(stream)
     # stream = create_file_match_error(stream)
     # stream = create_extra_column_error(stream)
     return stream
diff --git a/tests/social_work_workforce/test_cleaner.py b/tests/social_work_workforce/test_cleaner.py
new file mode 100644
index 00000000..9da0d6ea
--- /dev/null
+++ b/tests/social_work_workforce/test_cleaner.py
@@ -0,0 +1,268 @@
+from datetime import datetime
+from sfdata_stream_parser import events
+from liiatools.datasets.social_work_workforce.lds_csww_clean import cleaner
+
+
+def test_clean_dates():
+    event = events.TextNode(text=datetime(2019, 1, 15), schema_dict={"date": "%d/%m/%Y"})
+    cleaned_event = list(cleaner.clean_dates(event))[0]
+    assert cleaned_event.text == datetime(2019, 1, 15).date()
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="2019/1/15", schema_dict={"date": "%d/%m/%Y"})
+    cleaned_event = list(cleaner.clean_dates(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(
+        text=datetime(2019, 1, 15), schema_dict={"not_date": "%d/%m/%Y"}
+    )
+    cleaned_event = list(cleaner.clean_dates(event))[0]
+    assert cleaned_event.text == datetime(2019, 1, 15)
+
+    event = events.TextNode(text="string", schema_dict={"not_date": "%d/%m/%Y"})
+    cleaned_event = list(cleaner.clean_dates(event))[0]
+    assert cleaned_event.text == "string"
+
+    event = events.TextNode(text=None, schema_dict={"date": "%d/%m/%Y"})
+    cleaned_event = list(cleaner.clean_dates(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="", schema_dict={"date": "%d/%m/%Y"})
+    cleaned_event = list(cleaner.clean_dates(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+
+def test_clean_categories():
+    event = events.TextNode(
+        text="0",
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == "0"
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(
+        text="0.0",
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == "0"
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(
+        text=0,
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == "0"
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(
+        text="true",
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == "1"
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(
+        text=123,
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(
+        text="string",
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(
+        text="string",
+        schema_dict={
+            "not_category": [
+                {"code": "0", "name": "False"},
+                {"code": "1", "name": "True"},
+            ]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == "string"
+
+    event = events.TextNode(
+        text=None,
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(
+        text="",
+        schema_dict={
+            "category": [{"code": "0", "name": "False"}, {"code": "1", "name": "True"}]
+        },
+    )
+    cleaned_event = list(cleaner.clean_categories(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+
+def test_clean_numeric_integer():
+    event = events.TextNode(text=123, schema_dict={"numeric": "integer"})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="", schema_dict={"numeric": "integer"})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text=None, schema_dict={"numeric": "integer"})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="123", schema_dict={"numeric": "integer"})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="string", schema_dict={"numeric": "integer"})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "integer"})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(
+        text=datetime(2017, 3, 17), schema_dict={"not_numeric": "integer"}
+    )
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == datetime(2017, 3, 17)
+
+
+def test_clean_numeric_decimal():
+    event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123.45
+    assert cleaned_event.error == "0"
+    
+    event = events.TextNode(text=123.4567, schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123.46
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 0})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text=123.456, schema_dict={"numeric": "decimal", "decimal": 6})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123.456
+    assert cleaned_event.error == "0"
+    
+    event = events.TextNode(text="", schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text=None, schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="123.4567", schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 123.46
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="string", schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "decimal", "decimal": 2})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(
+        text=datetime(2017, 3, 17), schema_dict={"not_numeric": "decimal"}
+    )
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == datetime(2017, 3, 17)
+
+def test_clean_regex_string():
+    event = events.TextNode(text="AB1234567890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == "AB1234567890"
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text=None, schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="      AB1234567890    ", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == "AB1234567890"
+    assert cleaned_event.error == "0"
+
+    event = events.TextNode(text="AB123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(text="AB1234567890123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(text="AB12345 67890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.error == "1"
+
+    event = events.TextNode(text="string", schema_dict={"not_regex_string": r"[A-Za-z]{2}\d{10}"})
+    cleaned_event = list(cleaner.clean_regex_string(event))[0]
+    assert cleaned_event.text == "string"
+
+
+# test_clean_dates()
+# test_clean_categories()
+# test_clean_numeric_integer()
+# test_clean_numeric_decimal()
+# test_clean_regex_string()
+
diff --git a/tests/social_work_workforce/test_converters.py b/tests/social_work_workforce/test_converters.py
index 4796cf90..88b8ff1d 100644
--- a/tests/social_work_workforce/test_converters.py
+++ b/tests/social_work_workforce/test_converters.py
@@ -31,29 +31,32 @@ def test_to_integer():
     assert converters.to_integer(123, "integer") == 123
     assert converters.to_integer("1.0", "integer") == 1
     assert converters.to_integer("date", "") == "date"
+    assert converters.to_integer(123.456, "integer") == 123
     assert converters.to_integer(0, "integer") == 0
     assert converters.to_integer("", "integer") == ""
     assert converters.to_integer(None, "integer") == ""
 
 
 def test_to_decimal():
-    decimalplaces = 3
-    assert converters.to_decimal("12.345", "decimal", decimalplaces) == "12.345"
-    assert converters.to_decimal("12.3456", "decimal", decimalplaces) == "12.346"
-    assert converters.to_decimal("12.3", "decimal", decimalplaces) == "12.300"
-    assert converters.to_decimal(12.3456, "decimal", decimalplaces) == "12.346"
-    assert converters.to_decimal("1.0", "decimal", decimalplaces) == "1.000"
-    assert converters.to_decimal(0, "decimal", decimalplaces) == "0.000"
+    decimal_places = 3
+    assert converters.to_decimal("12.345", "decimal", decimal_places) == 12.345
+    assert converters.to_decimal("12.3456", "decimal", decimal_places) == 12.346
+    assert converters.to_decimal("12.3", "decimal", decimal_places) == 12.3
+    assert converters.to_decimal(12.3456, "decimal", decimal_places) == 12.346
+    assert converters.to_decimal("1.0", "decimal", decimal_places) == 1
+    assert converters.to_decimal(0, "decimal", decimal_places) == 0
     assert converters.to_decimal("date", "") == "date"
-    assert converters.to_decimal("", "decimal", decimalplaces) == ""
-    assert converters.to_decimal(None, "decimal", decimalplaces) == ""
+    assert converters.to_decimal("", "decimal", decimal_places) == ""
+    assert converters.to_decimal(None, "decimal", decimal_places) == ""
 
 
 def test_to_regex():
     pattern=r"[A-Za-z]{2}\d{10}"
     assert converters.to_regex("AB1234567890",pattern) == "AB1234567890" # match
-    assert converters.to_regex("AB1234567890123456",pattern) == "" # too long
-    assert converters.to_regex("AB12345",pattern) == "" # too short
+    assert converters.to_regex("  AB1234567890  ",pattern) == "AB1234567890" # match
+    assert converters.to_regex("AB1234567890123456",pattern) == "error" # too long
+    assert converters.to_regex("AB12345",pattern) == "error" # too short
+    assert converters.to_regex("xxxxOz2054309383",pattern) == "error" # invalid format
     assert converters.to_regex("date", "") == "date" # no pattern
     assert converters.to_regex("", pattern) == "" # no value
     assert converters.to_regex(None, pattern) == "" # no value
diff --git a/tests/social_work_workforce/test_logger.py b/tests/social_work_workforce/test_logger.py
new file mode 100644
index 00000000..550887f0
--- /dev/null
+++ b/tests/social_work_workforce/test_logger.py
@@ -0,0 +1,144 @@
+import tempfile as tmp
+from unittest.mock import patch
+from pathlib import Path
+from datetime import datetime
+
+from liiatools.datasets.social_work_workforce.lds_csww_clean import logger
+
+from sfdata_stream_parser import events
+
+
+def test_create_formatting_error_list():
+    stream = (
+        events.StartTable(table_name="AD1"),
+        events.Cell(header="some_header", error="1"),
+        events.Cell(header="some_header", error="1"),
+        events.Cell(header="some_header", error="0"),
+        events.EndTable(),
+    )
+    events_with_formatting_error_count = list(
+        logger.create_formatting_error_list(stream)
+    )
+    for event in events_with_formatting_error_count:
+        if isinstance(event, logger.ErrorTable):
+            assert event.formatting_error_count == [
+                "some_header",
+                "some_header",
+            ]
+
+    stream = (
+        events.StartTable(table_name="AD1"),
+        events.Cell(header="some_header", error="1"),
+        events.Cell(header="some_other_header", error="1"),
+        events.Cell(header="some_header"),
+        events.EndTable(),
+    )
+    events_with_formatting_error_count = list(
+        logger.create_formatting_error_list(stream)
+    )
+    for event in events_with_formatting_error_count:
+        if isinstance(event, logger.ErrorTable):
+            assert event.formatting_error_count == [
+                "some_header",
+                "some_other_header",
+            ]
+
+    stream = (
+        events.StartTable(table_name="AD1"),
+        events.Cell(header="some_header", error="1"),
+        events.Cell(header="some_header_2", error=None),
+        events.Cell(header="some_header_3", error=""),
+        events.Cell(),
+        events.EndTable(),
+    )
+    events_with_formatting_error_count = list(
+        logger.create_formatting_error_list(stream)
+    )
+    for event in events_with_formatting_error_count:
+        if isinstance(event, logger.ErrorTable):
+            assert event.formatting_error_count == ["some_header"]
+
+
+def test_blank_error_check():
+    stream = logger.blank_error_check(
+        [
+            events.TextNode(schema_dict={"canbeblank": False}, text="", error="0"),
+            events.TextNode(schema_dict={"canbeblank": False}, text=None, error="0"),
+            events.TextNode(schema_dict={"canbeblank": False}, text="", error="1"),
+            events.TextNode(schema_dict={"canbeblank": False}, text="string", error="0"),
+            events.TextNode(schema_dict={"canbeblank": True}, text="", error="0"),
+        ]
+    )
+    # for e in stream:
+    #     print(e.as_dict())
+    stream = list(stream)
+    # print(stream[1].as_dict())
+    assert stream[0].blank_error == "1"
+    assert stream[1].blank_error == "1"
+    assert "blank_error" not in stream[2].as_dict()
+    assert "blank_error" not in stream[3].as_dict()
+    assert "blank_error" not in stream[4].as_dict()
+
+
+def test_create_blank_error_list():
+    stream = (
+        events.StartTable(),
+        events.Cell(header="some_header", blank_error="1"),
+        events.Cell(header="some_header_2", blank_error=None),
+        events.Cell(header="some_header_3", blank_error=""),
+        events.Cell(),
+        logger.ErrorTable(),
+        events.EndTable(),
+    )
+    events_with_blank_error_count = list(logger.create_blank_error_list(stream))
+    for event in events_with_blank_error_count:
+        if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
+            assert event.blank_error_count == ["some_header"]
+
+    stream = (
+        events.StartTable(),
+        events.Cell(header="some_header", blank_error="1"),
+        events.Cell(header="some_header_2", blank_error="1"),
+        events.Cell(header="some_header_3", blank_error=""),
+        events.Cell(),
+        logger.ErrorTable(),
+    )
+    events_with_blank_error_count = list(logger.create_blank_error_list(stream))
+    for event in events_with_blank_error_count:
+        if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
+            assert event.blank_error_count == [
+                "some_header",
+                "some_header_2",
+            ]
+
+
+@patch("builtins.open", create=True)
+def test_save_errors_la(mock_save):
+    la_log_dir = tmp.gettempdir()
+    start_time = f"{datetime.now():%Y-%m-%dT%H%M%SZ}"
+
+    stream = logger.save_errors_la(
+        [
+            logger.ErrorTable(
+                filename="test_file",
+                formatting_error_count=["CHILD", "CHILD", "AGE"],
+                blank_error_count=["POSTCODE", "POSTCODE", "DATE"],
+                table_name="List 1",
+                extra_column_error=["list", "of", "headers"],
+            ),
+        ],
+        la_log_dir,
+    )
+    stream = list(stream)
+
+    mock_save.assert_called_once_with(
+        f"{Path(la_log_dir, 'test_file')}_error_log_{start_time}.txt", "a"
+    )
+    # mock_save.write.assert_called_once_with(f"test_file_{start_time}")
+
+# test_create_formatting_error_list()
+# test_blank_error_check()
+# test_create_blank_error_list()
+# test_save_errors_la()
+
+

From 094faf41af0d1d24fc2bab638d85f5f975ca5650 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 17 Jul 2023 17:39:18 +0000
Subject: [PATCH 28/40] Fix formatting_error name; run Black

---
 .../csww_main_functions.py                    |   9 +-
 .../lds_csww_clean/cleaner.py                 |  60 ++++++++---
 .../lds_csww_clean/logger.py                  |  48 ++++-----
 .../spec/social_work_workforce/pan-agg.yml    |   2 +-
 tests/social_work_workforce/test_logger.py    | 101 +++++++++---------
 5 files changed, 123 insertions(+), 97 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index ac0f3d5e..1c65b7cc 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -11,7 +11,7 @@
 from liiatools.datasets.social_work_workforce.lds_csww_clean.xml import (
     etree,
     to_xml,
-    dom_parse
+    dom_parse,
 )
 from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import (
     Schema,
@@ -132,12 +132,9 @@ def cleanfile(input, la_code, la_log_dir, output):
     stream = filters.add_context(stream)
     stream = filters.add_schema(stream, schema=Schema(input_year).schema)
     stream = filters.add_schema_dict(stream, schema_path=FilePath(input_year).path)
-    # Clean stream
-    stream = cleaner.clean_categories(stream)
-    stream = cleaner.clean_dates(stream)
-    stream = cleaner.clean_numeric(stream)
-    stream = cleaner.clean_regex_string(stream)
 
+    # Clean stream
+    stream = cleaner.clean(stream)
     stream = logger.log_errors(stream)
 
     # Output results
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 22287af8..fb4e857f 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -4,16 +4,22 @@
 from sfdata_stream_parser import events
 from sfdata_stream_parser.filters.generic import streamfilter, pass_event
 
-from liiatools.datasets.social_work_workforce.lds_csww_clean.converters import to_category, to_integer, to_decimal, to_regex
+from liiatools.datasets.social_work_workforce.lds_csww_clean.converters import (
+    to_category,
+    to_integer,
+    to_decimal,
+    to_regex,
+)
 
 from liiatools.datasets.shared_functions.converters import to_date
-#from liiatools.datasets.shared_functions.common import check_postcode
 
 log = logging.getLogger(__name__)
 
 
 @streamfilter(
-    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+    check=type_check(events.TextNode),
+    fail_function=pass_event,
+    error_function=pass_event,
 )
 def clean_dates(event):
     """
@@ -25,13 +31,15 @@ def clean_dates(event):
     dateformat = event.schema_dict["date"]
     try:
         clean_text = to_date(event.text, dateformat)
-        return event.from_event(event, text=clean_text, error="0")
+        return event.from_event(event, text=clean_text, formatting_error="0")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, text="", error="1")
+        return event.from_event(event, text="", formatting_error="1")
 
 
 @streamfilter(
-    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+    check=type_check(events.TextNode),
+    fail_function=pass_event,
+    error_function=pass_event,
 )
 def clean_categories(event):
     """
@@ -44,14 +52,16 @@ def clean_categories(event):
     try:
         clean_text = to_category(event.text, category)
         if clean_text != "error":
-            return event.from_event(event, text=clean_text, error='0')
-        return event.from_event(event, text="", error="1")
+            return event.from_event(event, text=clean_text, formatting_error="0")
+        return event.from_event(event, text="", formatting_error="1")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, text="", error="1")
+        return event.from_event(event, text="", formatting_error="1")
 
 
 @streamfilter(
-    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+    check=type_check(events.TextNode),
+    fail_function=pass_event,
+    error_function=pass_event,
 )
 def clean_numeric(event):
     """
@@ -68,14 +78,16 @@ def clean_numeric(event):
             decimal_places = int(event.schema_dict["decimal"])
             clean_text = to_decimal(event.text, numeric, decimal_places)
         if clean_text != "error":
-            return event.from_event(event, text=clean_text, error='0')
-        return event.from_event(event, text="", error="1")
+            return event.from_event(event, text=clean_text, formatting_error="0")
+        return event.from_event(event, text="", formatting_error="1")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, text="", error="1")
+        return event.from_event(event, text="", formatting_error="1")
 
 
 @streamfilter(
-    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+    check=type_check(events.TextNode),
+    fail_function=pass_event,
+    error_function=pass_event,
 )
 def clean_regex_string(event):
     """
@@ -88,7 +100,21 @@ def clean_regex_string(event):
     try:
         clean_text = to_regex(event.text, pattern)
         if clean_text != "error":
-            return event.from_event(event, text=clean_text, error="0")
-        return event.from_event(event, text="", error="1")
+            return event.from_event(event, text=clean_text, formatting_error="0")
+        return event.from_event(event, text="", formatting_error="1")
     except (AttributeError, TypeError, ValueError):
-        return event.from_event(event, text="", error="1")
\ No newline at end of file
+        return event.from_event(event, text="", formatting_error="1")
+
+
+def clean(stream):
+    """
+    Compile the cleaning functions
+
+    :param event: A list of event objects
+    :return: An updated list of event objects
+    """
+    stream = clean_dates(stream)
+    stream = clean_categories(stream)
+    stream = clean_numeric(stream)
+    stream = clean_regex_string(stream)
+    return stream
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
index f9532d14..0765bfcd 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
@@ -21,24 +21,24 @@ def create_formatting_error_list(stream):
     :param stream: A filtered list of event objects
     :return: An updated list of event objects with error counts
     """
-    formatting_error_count = None
+    formatting_error_list = None
     for event in stream:
         if isinstance(event, events.StartElement) and event.tag == "LALevelVacancies":
-            formatting_error_count = []
+            formatting_error_list = []
         elif isinstance(event, events.EndElement) and event.tag == "Message":
             yield ErrorTable.from_event(
                 event,
-                formatting_error_count=formatting_error_count,
+                formatting_error_list=formatting_error_list,
             )
-            formatting_error_count = None
+            formatting_error_list = None
         elif (
-            formatting_error_count is not None
+            formatting_error_list is not None
             and isinstance(event, events.TextNode)
         ):
             try:
-                if event.error == "1":
-                    formatting_error_count.append(event.schema.name)
-            except AttributeError:  # Raised in case there is no event.error
+                if event.formatting_error == "1":
+                    formatting_error_list.append(event.schema.name)
+            except AttributeError:  # Raised in case there is no event.formatting_error
                 pass
         yield event
 
@@ -56,8 +56,8 @@ def blank_error_check(event):
     """
     try:
         allowed_blank = event.schema_dict["canbeblank"]
-        format_error = getattr(event, "error", "0")
-        if not allowed_blank and not event.text and format_error != "1":
+        formatting_error = getattr(event, "formatting_error", "0")
+        if not allowed_blank and not event.text and formatting_error != "1":
             return event.from_event(event, blank_error="1")
         else:
             return event
@@ -73,22 +73,22 @@ def create_blank_error_list(stream):
     :param stream: A filtered list of event objects
     :return: An updated list of event objects
     """
-    blank_error_count = None
+    blank_error_list = None
     for event in stream:
         if isinstance(event, events.StartElement) and event.tag == "LALevelVacancies":
-            blank_error_count = []
+            blank_error_list = []
         elif isinstance(event, events.EndElement) and event.tag == "Message":
-            blank_error_count = None
+            blank_error_list = None
         elif isinstance(event, ErrorTable):
-            yield ErrorTable.from_event(event, blank_error_count=blank_error_count)
-            blank_error_count = None
+            yield ErrorTable.from_event(event, blank_error_list=blank_error_list)
+            blank_error_list = None
         elif (
-            blank_error_count is not None
+            blank_error_list is not None
             and isinstance(event, events.TextNode)
         ):
             try:
                 if event.blank_error == "1":
-                    blank_error_count.append(event.schema.name)
+                    blank_error_list.append(event.schema.name)
             except AttributeError:  # Raised in case there is no event.blank_error
                 pass
         yield event
@@ -159,32 +159,32 @@ def save_errors_la(stream, la_log_dir, filename):
     for event in stream:
         try:
             if isinstance(event, ErrorTable) and (
-                event.formatting_error_count is not None
-                and event.blank_error_count is not None
+                event.formatting_error_list is not None
+                and event.blank_error_list is not None
             ):
-                if event.formatting_error_count or event.blank_error_count:
+                if event.formatting_error_list or event.blank_error_list:
                     with open(
                         f"{os.path.join(la_log_dir, filename)}_error_log_{start_time}.txt",
                         "a",
                     ) as f:
                         f.write("\n")
-                        if event.formatting_error_count:
+                        if event.formatting_error_list:
                             f.write(
                                 "Number of cells that have been made blank "
                                 "because they could not be formatted correctly"
                             )
                             f.write("\n")
-                            counter_dict = Counter(event.formatting_error_count)
+                            counter_dict = Counter(event.formatting_error_list)
                             f.write(
                                 str(counter_dict)[9:-2]
                             )  # Remove "Counter({" and "})" from string
                             f.write("\n")
-                        if event.blank_error_count:
+                        if event.blank_error_list:
                             f.write(
                                 "Number of blank cells that should have contained data"
                             )
                             f.write("\n")
-                            blank_counter_dict = Counter(event.blank_error_count)
+                            blank_counter_dict = Counter(event.blank_error_list)
                             f.write(
                                 str(blank_counter_dict)[9:-2]
                             )  # Remove "Counter({" and "})" from string
diff --git a/liiatools/spec/social_work_workforce/pan-agg.yml b/liiatools/spec/social_work_workforce/pan-agg.yml
index c6bdad4e..ecf3b9b6 100644
--- a/liiatools/spec/social_work_workforce/pan-agg.yml
+++ b/liiatools/spec/social_work_workforce/pan-agg.yml
@@ -28,4 +28,4 @@ column_names:
 
 pan_data_kept:
     - CSWWWorker
-    - LALevelVacanciespisodes
\ No newline at end of file
+    - LALevelVacancies
\ No newline at end of file
diff --git a/tests/social_work_workforce/test_logger.py b/tests/social_work_workforce/test_logger.py
index 550887f0..aa291052 100644
--- a/tests/social_work_workforce/test_logger.py
+++ b/tests/social_work_workforce/test_logger.py
@@ -1,4 +1,5 @@
 import tempfile as tmp
+import xmlschema
 from unittest.mock import patch
 from pathlib import Path
 from datetime import datetime
@@ -11,9 +12,9 @@
 def test_create_formatting_error_list():
     stream = (
         events.StartTable(table_name="AD1"),
-        events.Cell(header="some_header", error="1"),
-        events.Cell(header="some_header", error="1"),
-        events.Cell(header="some_header", error="0"),
+        events.Cell(header="some_header", formatting_error="1"),
+        events.Cell(header="some_header", formatting_error="1"),
+        events.Cell(header="some_header", formatting_error="0"),
         events.EndTable(),
     )
     events_with_formatting_error_count = list(
@@ -28,8 +29,8 @@ def test_create_formatting_error_list():
 
     stream = (
         events.StartTable(table_name="AD1"),
-        events.Cell(header="some_header", error="1"),
-        events.Cell(header="some_other_header", error="1"),
+        events.Cell(header="some_header", formatting_error="1"),
+        events.Cell(header="some_other_header", formatting_error="1"),
         events.Cell(header="some_header"),
         events.EndTable(),
     )
@@ -45,9 +46,9 @@ def test_create_formatting_error_list():
 
     stream = (
         events.StartTable(table_name="AD1"),
-        events.Cell(header="some_header", error="1"),
-        events.Cell(header="some_header_2", error=None),
-        events.Cell(header="some_header_3", error=""),
+        events.Cell(header="some_header", formatting_error="1"),
+        events.Cell(header="some_header_2", formatting_error=None),
+        events.Cell(header="some_header_3", formatting_error=""),
         events.Cell(),
         events.EndTable(),
     )
@@ -62,17 +63,14 @@ def test_create_formatting_error_list():
 def test_blank_error_check():
     stream = logger.blank_error_check(
         [
-            events.TextNode(schema_dict={"canbeblank": False}, text="", error="0"),
-            events.TextNode(schema_dict={"canbeblank": False}, text=None, error="0"),
-            events.TextNode(schema_dict={"canbeblank": False}, text="", error="1"),
-            events.TextNode(schema_dict={"canbeblank": False}, text="string", error="0"),
-            events.TextNode(schema_dict={"canbeblank": True}, text="", error="0"),
+            events.TextNode(schema_dict={"canbeblank": False}, text="", formatting_error="0"),
+            events.TextNode(schema_dict={"canbeblank": False}, text=None, formatting_error="0"),
+            events.TextNode(schema_dict={"canbeblank": False}, text="", formatting_error="1"),
+            events.TextNode(schema_dict={"canbeblank": False}, text="string", formatting_error="0"),
+            events.TextNode(schema_dict={"canbeblank": True}, text="", formatting_error="0"),
         ]
     )
-    # for e in stream:
-    #     print(e.as_dict())
     stream = list(stream)
-    # print(stream[1].as_dict())
     assert stream[0].blank_error == "1"
     assert stream[1].blank_error == "1"
     assert "blank_error" not in stream[2].as_dict()
@@ -80,36 +78,41 @@ def test_blank_error_check():
     assert "blank_error" not in stream[4].as_dict()
 
 
-def test_create_blank_error_list():
-    stream = (
-        events.StartTable(),
-        events.Cell(header="some_header", blank_error="1"),
-        events.Cell(header="some_header_2", blank_error=None),
-        events.Cell(header="some_header_3", blank_error=""),
-        events.Cell(),
-        logger.ErrorTable(),
-        events.EndTable(),
-    )
-    events_with_blank_error_count = list(logger.create_blank_error_list(stream))
-    for event in events_with_blank_error_count:
-        if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
-            assert event.blank_error_count == ["some_header"]
-
-    stream = (
-        events.StartTable(),
-        events.Cell(header="some_header", blank_error="1"),
-        events.Cell(header="some_header_2", blank_error="1"),
-        events.Cell(header="some_header_3", blank_error=""),
-        events.Cell(),
-        logger.ErrorTable(),
-    )
-    events_with_blank_error_count = list(logger.create_blank_error_list(stream))
-    for event in events_with_blank_error_count:
-        if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
-            assert event.blank_error_count == [
-                "some_header",
-                "some_header_2",
-            ]
+# def test_create_blank_error_list():
+#     schema = xmlschema.XsdElement('some_header',"xxx",None,True)
+#     stream = (
+#         events.StartElement(tag="LALevelVacancies"),
+#         events.TextNode(text="text_1", schema=schema, blank_error="1"),
+#         # events.TextNode(text="text_2", schema={'name': 'some_header_2'}, blank_error=None),
+#         # events.TextNode(text="text_3", schema={'name': 'some_header_3'}, blank_error=""),
+#         # events.TextNode(text="text_4", schema="header {'name': 'some_header_4'}')"),
+#         events.EndElement(tag="Message"),
+#         logger.ErrorTable(),
+#     )
+#     events_with_blank_error_count = list(logger.create_blank_error_list(stream))
+#     print(f"blank error headers = {events_with_blank_error_count}")
+#     for event in events_with_blank_error_count:
+#         print(event.schema.name)
+#         if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
+#             assert event.blank_error_count == ["some_header"]
+
+#     stream = (
+#         events.StartElement(tag="LALevelVacancies"),
+#         events.TextNode(text="some_header", blank_error="1"),
+#         events.TextNode(text="some_header_2", blank_error="1"),
+#         events.TextNode(text="some_header_3", blank_error=""),
+#         events.TextNode(text="some_header_4"),
+#         events.EndElement(tag="Message"),
+#         logger.ErrorTable(),
+#     )
+#     events_with_blank_error_list = list(logger.create_blank_error_list(stream))
+#     for event in events_with_blank_error_list:
+#         if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
+#             print(event.blank_error_count)
+#             assert event.blank_error_count == [
+#                 "some_header",
+#                 "some_header_2",
+#             ]
 
 
 @patch("builtins.open", create=True)
@@ -136,9 +139,9 @@ def test_save_errors_la(mock_save):
     )
     # mock_save.write.assert_called_once_with(f"test_file_{start_time}")
 
-# test_create_formatting_error_list()
-# test_blank_error_check()
-# test_create_blank_error_list()
+test_create_formatting_error_list()
+test_blank_error_check()
+test_create_blank_error_list()
 # test_save_errors_la()
 
 

From 635b472045213ec85340235957ddb67dcd24a610 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 31 Jul 2023 14:13:15 +0000
Subject: [PATCH 29/40] Update logger, create tests, run black

---
 .../csww_main_functions.py                    | 12 +--
 .../lds_csww_clean/filters.py                 | 23 +++++-
 .../lds_csww_clean/logger.py                  | 70 ----------------
 tests/social_work_workforce/test_logger.py    | 82 ++++---------------
 4 files changed, 45 insertions(+), 142 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 1c65b7cc..62ea8bb1 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -218,12 +218,12 @@ def pan_agg(input, la_code, output):
 
 # Run in Visual Studio Code |>
 
-cleanfile(
-    "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
-    "BAD",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-    "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-)
+# cleanfile(
+#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022_sc.xml",
+#     "BAD",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+# )
 
 # la_agg(
 #     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
index eafb9297..3073a5c8 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
@@ -76,7 +76,10 @@ def _create_category_dict(field: str, file: str):
             documentation = element.findall(search_doc)
             for i, d in enumerate(documentation):
                 name_dict = {"name": d.text}
-                category_dict["category"][i] = {**category_dict["category"][i], **name_dict}
+                category_dict["category"][i] = {
+                    **category_dict["category"][i],
+                    **name_dict,
+                }
 
             return category_dict
 
@@ -94,7 +97,9 @@ def _create_float_dict(field: str, file: str):
     search_restriction = f".//{{http://www.w3.org/2001/XMLSchema}}restriction"
     restriction = element.findall(search_restriction)
     for r in restriction:
-        code_dict = {"numeric": r.get("base")[3:]}  # Remove the "xs:" from the start of the base string
+        code_dict = {
+            "numeric": r.get("base")[3:]
+        }  # Remove the "xs:" from the start of the base string
         if code_dict["numeric"] == "decimal":
             float_dict = code_dict
 
@@ -120,6 +125,13 @@ def _create_float_dict(field: str, file: str):
 
 
 def _create_regex_dict(field: str, file: str):
+    """
+    Parse an XML file and extract the regex pattern for a given field name
+
+    :param field: The name of the field to look for in the XML file
+    :param file: The path to the XML file
+    :return: A dictionary with the key "regex_string" and the value as the regex pattern, or None if no pattern is found
+    """
     regex_dict = None
 
     xsd_xml = ET.parse(file)
@@ -163,6 +175,13 @@ def add_schema(event, schema: xmlschema.XMLSchema):
 
 @streamfilter(check=type_check(events.TextNode), fail_function=pass_event)
 def add_schema_dict(event, schema_path: str):
+    """
+    Add a dictionary of schema attributes to an event object based on its type and occurrence
+
+    :param event: An event object with a schema attribute
+    :param schema_path: The path to the schema file
+    :return: A new event object with a schema_dict attribute, or the original event object if no schema_dict is found
+    """
     schema_dict = None
 
     config_type = event.schema.type.name
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
index 0765bfcd..8c7e715a 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
@@ -94,57 +94,6 @@ def create_blank_error_list(stream):
         yield event
 
 
-@streamfilter(
-    check=type_check(events.StartTable),
-    fail_function=pass_event,
-    error_function=pass_event,
-)
-def create_file_match_error(event):
-    """
-    Add a match_error to StartTables that do not have an event.sheet_name so these errors can be written to the log.txt
-    file. If there is no event.sheet_name for a given StartTable that means its headers did not match any of those
-    in the config file
-
-    :param event: A filtered list of event objects of type StartTable
-    :return: An updated list of event objects
-    """
-    try:
-        if event.table_name:
-            return event
-    except AttributeError:
-        return event.from_event(
-            event,
-            match_error=f"Failed to find a set of matching columns headers for file titled "
-            f"'{event.filename}' which contains column headers {event.headers} so no output has been produced",
-        )
-    return event
-
-
-@streamfilter(
-    check=type_check(events.StartTable),
-    fail_function=pass_event,
-    error_function=pass_event,
-)
-def create_extra_column_error(event):
-    """
-    Add a extra_column_error to StartTables that have more columns than the set of expected columns so these can be written to the log.txt
-
-    :param event: A filtered list of event objects of type StartTable
-    :return: An updated list of event objects
-    """
-    extra_columns = [
-        item for item in event.headers if item not in event.expected_columns
-    ]
-    if len(extra_columns) == 0:
-        return event
-    else:
-        return event.from_event(
-            event,
-            extra_column_error=f"Additional columns were found in file titled "
-            f"'{event.filename}' than those expected from schema for filetype = {event.table_name}, so these columns have been removed: {extra_columns}",
-        )
-
-
 def save_errors_la(stream, la_log_dir, filename):
     """
     Count the error events and save them as a text file in the Local Authority Logs directory
@@ -192,23 +141,6 @@ def save_errors_la(stream, la_log_dir, filename):
         except AttributeError:
             pass
 
-        # if isinstance(event, events.StartTable):
-        #     match_error = getattr(event, "match_error", None)
-        #     if match_error:
-        #         with open(
-        #             f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
-        #             "a",
-        #         ) as f:
-        #             f.write(match_error)
-        #             f.write("\n")
-        #     column_error = getattr(event, "extra_column_error", None)
-        #     if column_error:
-        #         with open(
-        #             f"{os.path.join(la_log_dir, event.filename)}_error_log_{start_time}.txt",
-        #             "a",
-        #         ) as f:
-        #             f.write(column_error)
-        #             f.write("\n")
         yield event
 
 
@@ -222,6 +154,4 @@ def log_errors(stream):
     stream = blank_error_check(stream)
     stream = create_formatting_error_list(stream)
     stream = create_blank_error_list(stream)
-    # stream = create_file_match_error(stream)
-    # stream = create_extra_column_error(stream)
     return stream
diff --git a/tests/social_work_workforce/test_logger.py b/tests/social_work_workforce/test_logger.py
index aa291052..ad00a80b 100644
--- a/tests/social_work_workforce/test_logger.py
+++ b/tests/social_work_workforce/test_logger.py
@@ -1,5 +1,4 @@
 import tempfile as tmp
-import xmlschema
 from unittest.mock import patch
 from pathlib import Path
 from datetime import datetime
@@ -61,7 +60,7 @@ def test_create_formatting_error_list():
 
 
 def test_blank_error_check():
-    stream = logger.blank_error_check(
+    mock_stream = logger.blank_error_check(
         [
             events.TextNode(schema_dict={"canbeblank": False}, text="", formatting_error="0"),
             events.TextNode(schema_dict={"canbeblank": False}, text=None, formatting_error="0"),
@@ -70,7 +69,7 @@ def test_blank_error_check():
             events.TextNode(schema_dict={"canbeblank": True}, text="", formatting_error="0"),
         ]
     )
-    stream = list(stream)
+    stream = list(mock_stream)
     assert stream[0].blank_error == "1"
     assert stream[1].blank_error == "1"
     assert "blank_error" not in stream[2].as_dict()
@@ -78,70 +77,25 @@ def test_blank_error_check():
     assert "blank_error" not in stream[4].as_dict()
 
 
-# def test_create_blank_error_list():
-#     schema = xmlschema.XsdElement('some_header',"xxx",None,True)
-#     stream = (
-#         events.StartElement(tag="LALevelVacancies"),
-#         events.TextNode(text="text_1", schema=schema, blank_error="1"),
-#         # events.TextNode(text="text_2", schema={'name': 'some_header_2'}, blank_error=None),
-#         # events.TextNode(text="text_3", schema={'name': 'some_header_3'}, blank_error=""),
-#         # events.TextNode(text="text_4", schema="header {'name': 'some_header_4'}')"),
-#         events.EndElement(tag="Message"),
-#         logger.ErrorTable(),
-#     )
-#     events_with_blank_error_count = list(logger.create_blank_error_list(stream))
-#     print(f"blank error headers = {events_with_blank_error_count}")
-#     for event in events_with_blank_error_count:
-#         print(event.schema.name)
-#         if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
-#             assert event.blank_error_count == ["some_header"]
-
-#     stream = (
-#         events.StartElement(tag="LALevelVacancies"),
-#         events.TextNode(text="some_header", blank_error="1"),
-#         events.TextNode(text="some_header_2", blank_error="1"),
-#         events.TextNode(text="some_header_3", blank_error=""),
-#         events.TextNode(text="some_header_4"),
-#         events.EndElement(tag="Message"),
-#         logger.ErrorTable(),
-#     )
-#     events_with_blank_error_list = list(logger.create_blank_error_list(stream))
-#     for event in events_with_blank_error_list:
-#         if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
-#             print(event.blank_error_count)
-#             assert event.blank_error_count == [
-#                 "some_header",
-#                 "some_header_2",
-#             ]
-
-
-@patch("builtins.open", create=True)
-def test_save_errors_la(mock_save):
-    la_log_dir = tmp.gettempdir()
-    start_time = f"{datetime.now():%Y-%m-%dT%H%M%SZ}"
-
-    stream = logger.save_errors_la(
-        [
-            logger.ErrorTable(
-                filename="test_file",
-                formatting_error_count=["CHILD", "CHILD", "AGE"],
-                blank_error_count=["POSTCODE", "POSTCODE", "DATE"],
-                table_name="List 1",
-                extra_column_error=["list", "of", "headers"],
-            ),
-        ],
-        la_log_dir,
+def test_create_blank_error_list():
+    mock_stream = (
+        events.StartElement(tag="LALevelVacancies"),
+        events.TextNode(text="some_header", blank_error="1"),
+        events.TextNode(text="some_header_2", blank_error="1"),
+        events.TextNode(text="some_header_3", blank_error=""),
+        events.TextNode(text="some_header_4"),
+        events.EndElement(tag="Message"),
     )
-    stream = list(stream)
+    events_with_blank_error_list = list(logger.create_blank_error_list(mock_stream))
+    for event in events_with_blank_error_list:
+        if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
+            print(event.blank_error_list)
+            assert event.blank_error_list == [
+                "some_header",
+                "some_header_2",
+            ]
 
-    mock_save.assert_called_once_with(
-        f"{Path(la_log_dir, 'test_file')}_error_log_{start_time}.txt", "a"
-    )
-    # mock_save.write.assert_called_once_with(f"test_file_{start_time}")
 
 test_create_formatting_error_list()
 test_blank_error_check()
 test_create_blank_error_list()
-# test_save_errors_la()
-
-

From e627801b6c3428646e118122bb73d31aa2b4f2eb Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 1 Aug 2023 09:44:21 +0000
Subject: [PATCH 30/40] Add docstrings in SWFtools modules

---
 .../SWFtools/analysis/FTESum.py               | 14 +++++++++++++
 .../SWFtools/analysis/growth_tables.py        |  4 ++++
 .../SWFtools/analysis/seniority.py            | 21 +++++++++++++++++++
 .../lds_csww_clean/csww_record.py             |  6 ++++++
 4 files changed, 45 insertions(+)

diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
index 39c69088..3275bb2e 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
@@ -4,6 +4,13 @@
 
 
 def FTESum():
+    """
+    Calculate the sum of FTE by LEAName, YearCensus, SeniorityCode and SeniorityName from
+    the input csv file
+
+    :return: Excel file with the name FTESum_5d.xlsx and the same path as the input file
+    """
+
     # ===== Read file ===== #
     file = "CompMergSen.csv"
     requestPath = work_path.request
@@ -26,6 +33,13 @@ def FTESum():
 
 
 def FTESum_2020():
+    """
+    Read a CSV file and calculate the sum of FTE by LEAName, YearCensus, SeniorityCode and
+    SeniorityName for the year 2020
+
+    :return: Excel file with the name FTESum_2020.xlsx and the same path as the input file
+    """
+    
     # ===== Read file ===== #
     file = "CompMergSen.csv"
     requestPath = work_path.request
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
index eb7dd8d6..4ad4bab2 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
@@ -8,6 +8,10 @@
 
 
 def growth_tables():
+    """
+    Create two Excel files with tables of growth rates and population growth for six LEAs
+    """
+    
     growth_rate_df = {
         "LEAName": [
             "Havering",
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
index b2d91a5a..0d94afe6 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
@@ -9,6 +9,12 @@
 
 
 def seniority():
+    """
+    Assign a seniority code to each row in the input CSV file
+    based on the role start date, agency worker status and org role.
+    
+    :return: CSV file with column SeniorityCode added.
+    """
     # ===== Read file ===== #
     file = "merged_modified.csv"
     path = work_path.flatfile_folder
@@ -109,6 +115,15 @@ def seniority():
 
 
 def seniority_forecast_04():
+    """
+    Calculate the seniority forecast for six LEAs from 2020 to 2025.
+
+    Reads two Excel files. The seniority forecast is calculated by multiplying the FTESum from the first file
+    by the population growth rate for each year and LEA from the second file
+
+    :return: Excel file with the name seniority_forecast_04_clean.xlsx
+    """
+
     # ===== Read file ===== #
     file = "FTESum_2020.xlsx"
     requestPath = work_path.request
@@ -236,6 +251,12 @@ def seniority_forecast_5c():
 
 
 def progressed():
+    """
+    Determine whether an employee has progressed in their seniority code from the previous year
+
+    :return: The input csv file with column called Progress added
+    """
+    
     # ===== Read file ===== #
     file = "Seniority.csv"
     requestPath = work_path.request
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 7eea634b..85d9acbb 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -42,6 +42,12 @@ def text_collector(stream):
 
 @xml_collector
 def message_collector(stream):
+    """
+    Collect messages from XML elements and yield events
+
+    :param stream: An iterator of events from an XML parser
+    :yield: Events of type HeaderEvent, CSWWEvent or LALevelEvent
+    """
     stream = peekable(stream)
     assert stream.peek().tag == "Message", "Expected Message, got {}".format(
         stream.peek().tag

From 8ca8f2b99f7c0144c83151e7a41984ab9592d56b Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 1 Aug 2023 09:55:43 +0000
Subject: [PATCH 31/40] Amend docstring for seniority.py

---
 .../SWFtools/analysis/seniority.py                   | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
index 0d94afe6..cd5d787e 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
@@ -10,10 +10,12 @@
 
 def seniority():
     """
-    Assign a seniority code to each row in the input CSV file
-    based on the role start date, agency worker status and org role.
-    
-    :return: CSV file with column SeniorityCode added.
+    Assign a seniority code to each worker in the input CSV file based on
+    the role start date, agency worker status and org role.
+
+    Also add two columns indicating whether they are new and whether they left in the census year.
+
+    :return: CSV file with columns SeniorityCode, NewOrNot and LeftOrNot added.
     """
     # ===== Read file ===== #
     file = "merged_modified.csv"
@@ -256,7 +258,7 @@ def progressed():
 
     :return: The input csv file with column called Progress added
     """
-    
+
     # ===== Read file ===== #
     file = "Seniority.csv"
     requestPath = work_path.request

From 9d64595a7a3d998074076677807bdacce4fddf7e Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 2 Aug 2023 08:33:21 +0000
Subject: [PATCH 32/40] Fix test_cleaner for incorrectly named variable

---
 .../SWFtools/util/work_path.py                |  1 -
 tests/social_work_workforce/test_cleaner.py   | 68 +++++++++----------
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/SWFtools/util/work_path.py b/liiatools/datasets/social_work_workforce/SWFtools/util/work_path.py
index c33db52c..ffa82e95 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/util/work_path.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/util/work_path.py
@@ -9,7 +9,6 @@
 
 # CSWW files must be in one "LA" folder per LA, in the cssw_folder
 csww_folder = os.path.join(main_folder, "samples/csww")
-# print (f"csww_folder is {csww_folder}")
 
 # Flat files
 flatfile_folder = os.path.join(main_folder, "samples/flatfiles")
diff --git a/tests/social_work_workforce/test_cleaner.py b/tests/social_work_workforce/test_cleaner.py
index 9da0d6ea..e334c9b8 100644
--- a/tests/social_work_workforce/test_cleaner.py
+++ b/tests/social_work_workforce/test_cleaner.py
@@ -7,12 +7,12 @@ def test_clean_dates():
     event = events.TextNode(text=datetime(2019, 1, 15), schema_dict={"date": "%d/%m/%Y"})
     cleaned_event = list(cleaner.clean_dates(event))[0]
     assert cleaned_event.text == datetime(2019, 1, 15).date()
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="2019/1/15", schema_dict={"date": "%d/%m/%Y"})
     cleaned_event = list(cleaner.clean_dates(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(
         text=datetime(2019, 1, 15), schema_dict={"not_date": "%d/%m/%Y"}
@@ -27,12 +27,12 @@ def test_clean_dates():
     event = events.TextNode(text=None, schema_dict={"date": "%d/%m/%Y"})
     cleaned_event = list(cleaner.clean_dates(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="", schema_dict={"date": "%d/%m/%Y"})
     cleaned_event = list(cleaner.clean_dates(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
 
 def test_clean_categories():
@@ -44,7 +44,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == "0"
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(
         text="0.0",
@@ -54,7 +54,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == "0"
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(
         text=0,
@@ -64,7 +64,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == "0"
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(
         text="true",
@@ -74,7 +74,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == "1"
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(
         text=123,
@@ -84,7 +84,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(
         text="string",
@@ -94,7 +94,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(
         text="string",
@@ -116,7 +116,7 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(
         text="",
@@ -126,39 +126,39 @@ def test_clean_categories():
     )
     cleaned_event = list(cleaner.clean_categories(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
 
 def test_clean_numeric_integer():
     event = events.TextNode(text=123, schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="", schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text=None, schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="123", schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="string", schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(
         text=datetime(2017, 3, 17), schema_dict={"not_numeric": "integer"}
@@ -171,47 +171,47 @@ def test_clean_numeric_decimal():
     event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.45
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
     
     event = events.TextNode(text=123.4567, schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.46
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 0})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text=123.456, schema_dict={"numeric": "decimal", "decimal": 6})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.456
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
     
     event = events.TextNode(text="", schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text=None, schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="123.4567", schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.46
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="string", schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(
         text=datetime(2017, 3, 17), schema_dict={"not_numeric": "decimal"}
@@ -223,37 +223,37 @@ def test_clean_regex_string():
     event = events.TextNode(text="AB1234567890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == "AB1234567890"
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text=None, schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="      AB1234567890    ", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == "AB1234567890"
-    assert cleaned_event.error == "0"
+    assert cleaned_event.formatting_error == "0"
 
     event = events.TextNode(text="AB123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(text="AB1234567890123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(text="AB12345 67890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.error == "1"
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(text="string", schema_dict={"not_regex_string": r"[A-Za-z]{2}\d{10}"})
     cleaned_event = list(cleaner.clean_regex_string(event))[0]

From df050ccd1c5437ae61fe3340879879403cd32600 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 2 Aug 2023 10:19:37 +0000
Subject: [PATCH 33/40] Fix SWFTools output errors

---
 .../SWFtools/analysis/FTESum.py               |  24 ++--
 .../SWFtools/analysis/growth_tables.py        |   5 +-
 .../SWFtools/analysis/seniority.py            | 119 ++++++++++--------
 .../csww/NEW/social_work_workforce_2022.xml   |   2 +-
 4 files changed, 82 insertions(+), 68 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
index 3275bb2e..6bc8e1b8 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
@@ -1,6 +1,7 @@
 import os
 import pandas as pd
 import liiatools.datasets.social_work_workforce.SWFtools.util.work_path as work_path
+import liiatools.datasets.social_work_workforce.SWFtools.util.AppLogs as AppLogs
 
 
 def FTESum():
@@ -47,15 +48,18 @@ def FTESum_2020():
     df = pd.read_csv(pathFile)
 
     df2020 = df[df["YearCensus"] == 2020]
+    
+    if df2020.empty:
+        AppLogs.log("FTESum_2020 error: No data for year 2020", console_output=True)
+    else:
+        df5D = df2020[["LEAName", "YearCensus", "SeniorityCode", "SeniorityName", "FTE"]]
 
-    df5D = df2020[["LEAName", "YearCensus", "SeniorityCode", "SeniorityName", "FTE"]]
-
-    df5D = df2020.groupby(
-        ["LEAName", "YearCensus", "SeniorityCode", "SeniorityName"]
-    ).agg(FTESum=("FTE", "sum"))
+        df5D = df2020.groupby(
+            ["LEAName", "YearCensus", "SeniorityCode", "SeniorityName"]
+        ).agg(FTESum=("FTE", "sum"))
 
-    # ===== Save and export file ===== #
-    fileOutN = "FTESum_2020.xlsx"
-    requestPath = work_path.request
-    fileOut = os.path.join(requestPath, fileOutN)
-    df5D.to_excel(fileOut, merge_cells=False)
+        # ===== Save and export file ===== #
+        fileOutN = "FTESum_2020.xlsx"
+        requestPath = work_path.request
+        fileOut = os.path.join(requestPath, fileOutN)
+        df5D.to_excel(fileOut, merge_cells=False)
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
index 4ad4bab2..2147de37 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
@@ -5,6 +5,7 @@
 import os
 import pandas as pd
 import liiatools.datasets.social_work_workforce.SWFtools.util.work_path as work_path
+import liiatools.datasets.social_work_workforce.SWFtools.util.AppLogs as AppLogs
 
 
 def growth_tables():
@@ -38,7 +39,7 @@ def growth_tables():
     fileOut = os.path.join(requestPath, fileOutN)
     growth_rate_table.to_excel(fileOut, index=False)
 
-    print("Auxiliary table: ", fileOutN, " Created")
+    AppLogs.log(f"Auxiliary table: {fileOutN} created", console_output=True)
 
     """
       Population growth table: 2020 to 2026
@@ -70,4 +71,4 @@ def growth_tables():
     fileOut = os.path.join(requestPath, fileOutN)
     population_growth_table.to_excel(fileOut, index=False)
 
-    print("Auxiliary table: ", fileOutN, " Created")
+    AppLogs.log(f"Auxiliary table: {fileOutN} created", console_output=True)
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
index cd5d787e..df4b2677 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
@@ -6,6 +6,7 @@
     SENIORITY_CODE_DICT,
 )
 import liiatools.datasets.social_work_workforce.SWFtools.util.work_path as work_path
+import liiatools.datasets.social_work_workforce.SWFtools.util.AppLogs as AppLogs
 
 
 def seniority():
@@ -130,61 +131,69 @@ def seniority_forecast_04():
     file = "FTESum_2020.xlsx"
     requestPath = work_path.request
     pathFile = os.path.join(requestPath, file)
-    dfSen = pd.read_excel(pathFile)
-
-    # ===== Rename column ===== #
-    dfSen.rename(columns={"FTESum": "2020"}, inplace=True)
-
-    # ===== Read file ===== #
-    file = "population_growth_table.xlsx"
-    requestPath = work_path.request
-    pathFile = os.path.join(requestPath, file)
-    p_df = pd.read_excel(pathFile)
-
-    countYearBefore = 2019
-    countYearNext = 2020
-    for count in range(5):
-        countYearBefore = countYearBefore + 1
-        countYearNext = countYearNext + 1
-        # Havering
-        dfSen.loc[dfSen["LEAName"] == "Havering", str(countYearNext)] = (
-            dfSen[str(countYearBefore)] / p_df.loc[0, str(countYearBefore)]
-        ) * p_df.loc[0, str(countYearNext)]
-        # Barking and Dagenham
-        dfSen.loc[dfSen["LEAName"] == "Barking and Dagenham", str(countYearNext)] = (
-            dfSen[str(countYearBefore)] / p_df.loc[1, str(countYearBefore)]
-        ) * p_df.loc[1, str(countYearNext)]
-        # Redbridge
-        dfSen.loc[dfSen["LEAName"] == "Redbridge", str(countYearNext)] = (
-            dfSen[str(countYearBefore)] / p_df.loc[2, str(countYearBefore)]
-        ) * p_df.loc[2, str(countYearNext)]
-        # Newham
-        dfSen.loc[dfSen["LEAName"] == "Newham", str(countYearNext)] = (
-            dfSen[str(countYearBefore)] / p_df.loc[3, str(countYearBefore)]
-        ) * p_df.loc[3, str(countYearNext)]
-        # Tower Hamlets
-        dfSen.loc[dfSen["LEAName"] == "Tower Hamlets", str(countYearNext)] = (
-            dfSen[str(countYearBefore)] / p_df.loc[4, str(countYearBefore)]
-        ) * p_df.loc[4, str(countYearNext)]
-        # Waltham Forest
-        dfSen.loc[dfSen["LEAName"] == "Waltham Forest", str(countYearNext)] = (
-            dfSen[str(countYearBefore)] / p_df.loc[5, str(countYearBefore)]
-        ) * p_df.loc[5, str(countYearNext)]
-
-    dfSen["2020"] = dfSen["2020"].round(3)
-    dfSen["2021"] = dfSen["2021"].round(3)
-    dfSen["2022"] = dfSen["2022"].round(3)
-    dfSen["2023"] = dfSen["2023"].round(3)
-    dfSen["2024"] = dfSen["2024"].round(3)
-    dfSen["2025"] = dfSen["2025"].round(3)
-
-    dfSen = dfSen.drop(["YearCensus"], axis=1)
-
-    # ===== Save and export file ===== #
-    fileOutN = "seniority_forecast_04_clean.xlsx"
-    requestPath = work_path.request
-    fileOut = os.path.join(requestPath, fileOutN)
-    dfSen.to_excel(fileOut, index=False, merge_cells=False)
+    try:
+        dfSen = pd.read_excel(pathFile)
+    except FileNotFoundError as e:
+        AppLogs.log(f"FileNotFoundError: {e.filename}", console_output=True)
+        return
+
+    
+    if dfSen.empty:
+        AppLogs.log("seniority_forecast_04 error: No data in FTESum_2020.xlsx", console_output=True)
+    else:
+        # ===== Rename column ===== #
+        dfSen.rename(columns={"FTESum": "2020"}, inplace=True)
+
+        # ===== Read file ===== #
+        file = "population_growth_table.xlsx"
+        requestPath = work_path.request
+        pathFile = os.path.join(requestPath, file)
+        p_df = pd.read_excel(pathFile)
+
+        countYearBefore = 2019
+        countYearNext = 2020
+        for count in range(5):
+            countYearBefore = countYearBefore + 1
+            countYearNext = countYearNext + 1
+            # Havering
+            dfSen.loc[dfSen["LEAName"] == "Havering", str(countYearNext)] = (
+                dfSen[str(countYearBefore)] / p_df.loc[0, str(countYearBefore)]
+            ) * p_df.loc[0, str(countYearNext)]
+            # Barking and Dagenham
+            dfSen.loc[dfSen["LEAName"] == "Barking and Dagenham", str(countYearNext)] = (
+                dfSen[str(countYearBefore)] / p_df.loc[1, str(countYearBefore)]
+            ) * p_df.loc[1, str(countYearNext)]
+            # Redbridge
+            dfSen.loc[dfSen["LEAName"] == "Redbridge", str(countYearNext)] = (
+                dfSen[str(countYearBefore)] / p_df.loc[2, str(countYearBefore)]
+            ) * p_df.loc[2, str(countYearNext)]
+            # Newham
+            dfSen.loc[dfSen["LEAName"] == "Newham", str(countYearNext)] = (
+                dfSen[str(countYearBefore)] / p_df.loc[3, str(countYearBefore)]
+            ) * p_df.loc[3, str(countYearNext)]
+            # Tower Hamlets
+            dfSen.loc[dfSen["LEAName"] == "Tower Hamlets", str(countYearNext)] = (
+                dfSen[str(countYearBefore)] / p_df.loc[4, str(countYearBefore)]
+            ) * p_df.loc[4, str(countYearNext)]
+            # Waltham Forest
+            dfSen.loc[dfSen["LEAName"] == "Waltham Forest", str(countYearNext)] = (
+                dfSen[str(countYearBefore)] / p_df.loc[5, str(countYearBefore)]
+            ) * p_df.loc[5, str(countYearNext)]
+
+        dfSen["2020"] = dfSen["2020"].round(3)
+        dfSen["2021"] = dfSen["2021"].round(3)
+        dfSen["2022"] = dfSen["2022"].round(3)
+        dfSen["2023"] = dfSen["2023"].round(3)
+        dfSen["2024"] = dfSen["2024"].round(3)
+        dfSen["2025"] = dfSen["2025"].round(3)
+
+        dfSen = dfSen.drop(["YearCensus"], axis=1)
+
+        # ===== Save and export file ===== #
+        fileOutN = "seniority_forecast_04_clean.xlsx"
+        requestPath = work_path.request
+        fileOut = os.path.join(requestPath, fileOutN)
+        dfSen.to_excel(fileOut, index=False, merge_cells=False)
 
 
 def seniority_forecast_5c():
diff --git a/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml b/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
index 3cb567fa..8c3fc4a5 100644
--- a/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
+++ b/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
@@ -7,7 +7,7 @@
     </CollectionDetails>
     <Source>
       <SourceLevel>L</SourceLevel>
-      <LEA>314</LEA>
+      <LEA>316</LEA>
       <SoftwareCode>liiatools.datasets.social_work_workforce.sample_data</SoftwareCode>
       <DateTime>2023-03-28T14:54:55Z</DateTime>
     </Source>

From 0014c13cc09cc3fbca5b9d1cb0d2f37272c99c2c Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Thu, 3 Aug 2023 08:48:23 +0000
Subject: [PATCH 34/40] Add unit tests for csww_record

---
 .../lds_csww_clean/csww_record.py             | 10 ++-
 .../social_work_workforce/test_csww_record.py | 80 +++++++++++++++++++
 2 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 tests/social_work_workforce/test_csww_record.py

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 85d9acbb..85d058ff 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -30,6 +30,12 @@ def _reduce_dict(dict_instance):
 
 @xml_collector
 def text_collector(stream):
+    """
+    Create a dictionary of text values for each element
+
+    :param stream: An iterator of events from an XML parser
+    :return: Dictionary containing element name and text values 
+    """
     data_dict = {}
     current_element = None
     for event in stream:
@@ -49,9 +55,7 @@ def message_collector(stream):
     :yield: Events of type HeaderEvent, CSWWEvent or LALevelEvent
     """
     stream = peekable(stream)
-    assert stream.peek().tag == "Message", "Expected Message, got {}".format(
-        stream.peek().tag
-    )
+    assert stream.peek().tag == "Message", f"Expected Message, got {stream.peek().tag}"
     while stream:
         event = stream.peek()
         if event.get("tag") == "Header":
diff --git a/tests/social_work_workforce/test_csww_record.py b/tests/social_work_workforce/test_csww_record.py
new file mode 100644
index 00000000..88cd3890
--- /dev/null
+++ b/tests/social_work_workforce/test_csww_record.py
@@ -0,0 +1,80 @@
+# Import the unittest module and the code to be tested
+import unittest
+from sfdata_stream_parser.events import StartElement, EndElement, TextNode
+from liiatools.datasets.social_work_workforce.lds_csww_clean.csww_record import (
+    text_collector,
+    message_collector,
+    CSWWEvent,
+    LALevelEvent,
+    HeaderEvent,
+)
+from liiatools.datasets.social_work_workforce.lds_csww_clean.xml import dom_parse
+
+
+class TestRecord(unittest.TestCase):
+    def generate_text_element(self, tag: str, text):
+        """
+        Create a complete TextNode sandwiched between a StartElement and EndElement
+
+        :param tag: XML tag
+        :param text: text to be stored in the given XML tag, could be a string, integer, float etc.
+        :return: StartElement and EndElement with given tags and TextNode with given text
+        """
+        yield StartElement(tag=tag)
+        yield TextNode(text=str(text))
+        yield EndElement(tag=tag)
+
+    def generate_test_csww_file(self):
+        """
+        Generate a sample children's social work workforce census file
+
+        :return: stream of generators containing information required to create an XML file
+        """
+        yield StartElement(tag="Message")
+        yield StartElement(tag="Header")
+        yield from self.generate_text_element(tag="Version", text=1)
+        yield EndElement(tag="Header")
+        yield StartElement(tag="LALevelVacancies")
+        yield from self.generate_text_element(tag="NumberOfVacancies", text=100)
+        yield EndElement(tag="LALevelVacancies")
+        yield StartElement(tag="CSWWWorker")
+        yield from self.generate_text_element(tag="ID", text=100)
+        yield from self.generate_text_element(tag="SWENo", text="AB123456789")
+        yield from self.generate_text_element(tag="Agency", text=0)
+        yield EndElement(tag="CSWWWorker")
+        yield EndElement(tag="Message")
+
+    def test_text_collector(self):
+        # test that the text_collector returns a dictionary of events and their text values from the stream
+        test_stream = self.generate_test_csww_file()
+        test_record = text_collector(test_stream)
+        self.assertEqual(len(test_record), 5)
+        self.assertEqual(
+            test_record,
+            {
+                "Version": "1",
+                "NumberOfVacancies": "100",
+                "ID": "100",
+                "SWENo": "AB123456789",
+                "Agency": "0",
+            },
+        )
+
+    def test_message_collector(self):
+        # test that the message_collector yields events of the correct type from the stream
+        test_stream = self.generate_test_csww_file()
+        test_events = list(message_collector(test_stream))
+        self.assertEqual(len(test_events), 3)
+        self.assertIsInstance(test_events[0], HeaderEvent)
+        self.assertEqual(test_events[0].record, {"Version": "1"})
+        self.assertIsInstance(test_events[1], LALevelEvent)
+        self.assertEqual(test_events[1].record, {"NumberOfVacancies": "100"})
+        self.assertIsInstance(test_events[2], CSWWEvent)
+        self.assertEqual(
+            test_events[2].record, {"ID": "100", "SWENo": "AB123456789", "Agency": "0"}
+        )
+
+
+# Run the tests
+if __name__ == "__main__":
+    unittest.main()

From e63c1051a3c6447e7bbb44a5a3432997133bea89 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Fri, 25 Aug 2023 14:32:43 +0000
Subject: [PATCH 35/40] Rewrite to_numeric in converters and cleaner

---
 .../lds_csww_clean/cleaner.py                 | 11 ++--
 .../lds_csww_clean/converters.py              | 62 +++++++------------
 tests/social_work_workforce/test_cleaner.py   |  7 +--
 .../social_work_workforce/test_converters.py  | 40 ++++++------
 4 files changed, 50 insertions(+), 70 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index fb4e857f..5e1e51b2 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -6,8 +6,7 @@
 
 from liiatools.datasets.social_work_workforce.lds_csww_clean.converters import (
     to_category,
-    to_integer,
-    to_decimal,
+    to_numeric,
     to_regex,
 )
 
@@ -73,10 +72,14 @@ def clean_numeric(event):
     numeric = event.schema_dict["numeric"]
     try:
         if numeric == "integer":
-            clean_text = to_integer(event.text, numeric)
+            clean_text = to_numeric(value=event.text, config=numeric)
         elif numeric == "decimal":
             decimal_places = int(event.schema_dict["decimal"])
-            clean_text = to_decimal(event.text, numeric, decimal_places)
+            # min_inclusive = event.schema_dict["min_inclusive"]
+            # print(f"min_inclusive = {min_inclusive}")
+            # max_inclusive = event.schema_dict["max_inclusive"]
+            # print(f"max_inclusive = {max_inclusive}")
+            clean_text = to_numeric(value=event.text, config=numeric, decimal_places=decimal_places) #  min_inclusive=min_inclusive, max_inclusive=max_inclusive
         if clean_text != "error":
             return event.from_event(event, text=clean_text, formatting_error="0")
         return event.from_event(event, text="", formatting_error="1")
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
index f3ba9ddc..f2009770 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
@@ -31,14 +31,26 @@ def to_category(string, categories):
     return "error"
 
 
-def to_integer(value, config):
+def to_numeric(value, config, decimal_places=0): # min_inclusive=None, max_inclusive=None
     """
-    Convert any strings that should be integers based on the config into integers
+    Convert any strings that should be integer or decimal based on the config into integer or decimal
 
-    :param value: Some value to convert to an integer
+    :param value: Some value to convert to an integer or decimal
     :param config: The loaded configuration
-    :return: Either an integer value or an "error" string if value could not be formatted as integer or a blank string if no value provided
+    :param dec_places: The number of decimal places to apply (default 0) 
+    :param min_inclusive: Minimum value allowed (default none)
+    :param max_inclusive: Maximum value allowed (default none)
+    :return: Either an integer, a decimal value formatted to number of decimal places or an "error" string if value could not be formatted as decimal or a blank string if no value provided
     """
+    if config == "decimal":
+        if value or value == 0:
+            try:
+                float(value)
+                round_to_dp = round(float(value), decimal_places)
+                return round_to_dp
+            except (ValueError, TypeError):
+                return "error" # value incorrectly formatted
+        return "" # no value provided
     if config == "integer":
         if value or value==0:
             if isinstance(value, str) and value[-2:] == ".0":
@@ -52,27 +64,6 @@ def to_integer(value, config):
         return value
 
 
-def to_decimal(value, config, decimal_places=0):
-    """
-    Convert any strings that should be decimal based on the config into decimals
-
-    :param value: Some value to convert to a decimal
-    :param config: The loaded configuration
-    :param dec_places: The number of decimal places to apply (default 0) 
-    :return: Either a decimal value formatted to number of decimal places or an "error" string if value could not be formatted as decimal or a blank string if no value provided
-    """
-    if config == "decimal":
-        if value or value == 0:
-            try:
-                float(value)
-                round_to_dp = round(float(value), decimal_places)
-                return round_to_dp
-            except (ValueError, TypeError):
-                return "error" # value incorrectly formatted
-        return "" # no value provided
-    return value
-
-
 def to_regex(value, pattern):
     """
     Convert any strings that should conform to regex pattern based on the schema into regex string
@@ -81,18 +72,11 @@ def to_regex(value, pattern):
     :param pattern: The regex pattern to compare
     :return: Either a string matching the regex pattern or an "error" string if value does not match pattern or a blank string if no value provided
     """
-    if pattern:
-        if value:
-            stripped_value = value.strip()
-            try:
-                isfullmatch = re.fullmatch(pattern, stripped_value)
-                if isfullmatch:
-                    return stripped_value
-                else:
-                    return "error" # value does not match regex pattern
-            except (ValueError, TypeError):
-                return "error" # value incorrectly formatted
-        else:
-            return "" # no value provided
+    if value:
+        stripped_value = value.strip()
+        isfullmatch = re.fullmatch(pattern, stripped_value)
+        if isfullmatch:
+            return stripped_value
+        return "error" # value does not match regex pattern
     else:
-        return value
\ No newline at end of file
+        return "" # no value provided
\ No newline at end of file
diff --git a/tests/social_work_workforce/test_cleaner.py b/tests/social_work_workforce/test_cleaner.py
index e334c9b8..6f4992c1 100644
--- a/tests/social_work_workforce/test_cleaner.py
+++ b/tests/social_work_workforce/test_cleaner.py
@@ -129,7 +129,7 @@ def test_clean_categories():
     assert cleaned_event.formatting_error == "0"
 
 
-def test_clean_numeric_integer():
+def test_clean_numeric():
     event = events.TextNode(text=123, schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123
@@ -166,8 +166,6 @@ def test_clean_numeric_integer():
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == datetime(2017, 3, 17)
 
-
-def test_clean_numeric_decimal():
     event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.45
@@ -262,7 +260,6 @@ def test_clean_regex_string():
 
 # test_clean_dates()
 # test_clean_categories()
-# test_clean_numeric_integer()
-# test_clean_numeric_decimal()
+# test_clean_numeric()
 # test_clean_regex_string()
 
diff --git a/tests/social_work_workforce/test_converters.py b/tests/social_work_workforce/test_converters.py
index 88b8ff1d..36aed7ce 100644
--- a/tests/social_work_workforce/test_converters.py
+++ b/tests/social_work_workforce/test_converters.py
@@ -26,28 +26,25 @@ def test_to_category():
     assert converters.to_category(None, category_dict) == ""
 
 
-def test_to_integer():
-    assert converters.to_integer("3000", "integer") == 3000
-    assert converters.to_integer(123, "integer") == 123
-    assert converters.to_integer("1.0", "integer") == 1
-    assert converters.to_integer("date", "") == "date"
-    assert converters.to_integer(123.456, "integer") == 123
-    assert converters.to_integer(0, "integer") == 0
-    assert converters.to_integer("", "integer") == ""
-    assert converters.to_integer(None, "integer") == ""
-
-
-def test_to_decimal():
+def test_to_numeric():
     decimal_places = 3
-    assert converters.to_decimal("12.345", "decimal", decimal_places) == 12.345
-    assert converters.to_decimal("12.3456", "decimal", decimal_places) == 12.346
-    assert converters.to_decimal("12.3", "decimal", decimal_places) == 12.3
-    assert converters.to_decimal(12.3456, "decimal", decimal_places) == 12.346
-    assert converters.to_decimal("1.0", "decimal", decimal_places) == 1
-    assert converters.to_decimal(0, "decimal", decimal_places) == 0
-    assert converters.to_decimal("date", "") == "date"
-    assert converters.to_decimal("", "decimal", decimal_places) == ""
-    assert converters.to_decimal(None, "decimal", decimal_places) == ""
+    assert converters.to_numeric("12.345", "decimal", decimal_places) == 12.345
+    assert converters.to_numeric("12.3456", "decimal", decimal_places) == 12.346
+    assert converters.to_numeric("12.3", "decimal", decimal_places) == 12.3
+    assert converters.to_numeric(12.3456, "decimal", decimal_places) == 12.346
+    assert converters.to_numeric("1.0", "decimal", decimal_places) == 1
+    assert converters.to_numeric(0, "decimal", decimal_places) == 0
+    assert converters.to_numeric("date", "") == "date"
+    assert converters.to_numeric("", "decimal", decimal_places) == ""
+    assert converters.to_numeric(None, "decimal", decimal_places) == ""
+    assert converters.to_numeric("3000", "integer") == 3000
+    assert converters.to_numeric(123, "integer") == 123
+    assert converters.to_numeric("1.0", "integer") == 1
+    assert converters.to_numeric("date", "") == "date"
+    assert converters.to_numeric(123.456, "integer") == 123
+    assert converters.to_numeric(0, "integer") == 0
+    assert converters.to_numeric("", "integer") == ""
+    assert converters.to_numeric(None, "integer") == ""
 
 
 def test_to_regex():
@@ -57,7 +54,6 @@ def test_to_regex():
     assert converters.to_regex("AB1234567890123456",pattern) == "error" # too long
     assert converters.to_regex("AB12345",pattern) == "error" # too short
     assert converters.to_regex("xxxxOz2054309383",pattern) == "error" # invalid format
-    assert converters.to_regex("date", "") == "date" # no pattern
     assert converters.to_regex("", pattern) == "" # no value
     assert converters.to_regex(None, pattern) == "" # no value
 

From 3f3313d9e6af74a8015c8995031c04972014a142 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 30 Aug 2023 09:43:36 +0000
Subject: [PATCH 36/40] Implement min/max in to_numeric

---
 .../lds_csww_clean/cleaner.py                 |  8 +++-----
 .../lds_csww_clean/converters.py              | 10 ++++++----
 tests/social_work_workforce/test_cleaner.py   | 19 +++++++++++++++++--
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 5e1e51b2..57acf184 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -75,11 +75,9 @@ def clean_numeric(event):
             clean_text = to_numeric(value=event.text, config=numeric)
         elif numeric == "decimal":
             decimal_places = int(event.schema_dict["decimal"])
-            # min_inclusive = event.schema_dict["min_inclusive"]
-            # print(f"min_inclusive = {min_inclusive}")
-            # max_inclusive = event.schema_dict["max_inclusive"]
-            # print(f"max_inclusive = {max_inclusive}")
-            clean_text = to_numeric(value=event.text, config=numeric, decimal_places=decimal_places) #  min_inclusive=min_inclusive, max_inclusive=max_inclusive
+            min_inclusive = event.schema_dict.get("min_inclusive", None)
+            max_inclusive = event.schema_dict.get("max_inclusive", None)
+            clean_text = to_numeric(value=event.text, config=numeric, decimal_places=decimal_places, min_inclusive=min_inclusive, max_inclusive=max_inclusive)
         if clean_text != "error":
             return event.from_event(event, text=clean_text, formatting_error="0")
         return event.from_event(event, text="", formatting_error="1")
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
index f2009770..9c813a7d 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
@@ -31,7 +31,7 @@ def to_category(string, categories):
     return "error"
 
 
-def to_numeric(value, config, decimal_places=0): # min_inclusive=None, max_inclusive=None
+def to_numeric(value, config, decimal_places=0, min_inclusive=None, max_inclusive=None):
     """
     Convert any strings that should be integer or decimal based on the config into integer or decimal
 
@@ -46,10 +46,12 @@ def to_numeric(value, config, decimal_places=0): # min_inclusive=None, max_inclu
         if value or value == 0:
             try:
                 float(value)
-                round_to_dp = round(float(value), decimal_places)
-                return round_to_dp
             except (ValueError, TypeError):
-                return "error" # value incorrectly formatted
+                return "error" # value is not a float
+            round_to_dp = round(float(value), decimal_places)
+            if (min_inclusive is None or round_to_dp >= min_inclusive) and (max_inclusive is None or round_to_dp <= max_inclusive):
+                return round_to_dp
+            return "error" # min/max error
         return "" # no value provided
     if config == "integer":
         if value or value==0:
diff --git a/tests/social_work_workforce/test_cleaner.py b/tests/social_work_workforce/test_cleaner.py
index 6f4992c1..8bd6c22d 100644
--- a/tests/social_work_workforce/test_cleaner.py
+++ b/tests/social_work_workforce/test_cleaner.py
@@ -201,15 +201,30 @@ def test_clean_numeric():
     assert cleaned_event.text == 123.46
     assert cleaned_event.formatting_error == "0"
 
+    event = events.TextNode(text="0.45", schema_dict={"numeric": "decimal", "decimal": 2, "min_inclusive": 0, "max_inclusive": 1})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == 0.45
+    assert cleaned_event.formatting_error == "0"
+
+    event = events.TextNode(text="1.99", schema_dict={"numeric": "decimal", "decimal": 2, "min_inclusive": 0, "max_inclusive": 1})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.formatting_error == "1" # exceeds maximum value
+
+    event = events.TextNode(text="0.50", schema_dict={"numeric": "decimal", "decimal": 2, "min_inclusive": 1, "max_inclusive": 9})
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.formatting_error == "1" # less than minimum value
+
     event = events.TextNode(text="string", schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.formatting_error == "1"
+    assert cleaned_event.formatting_error == "1" # not a decimal
 
     event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.formatting_error == "1"
+    assert cleaned_event.formatting_error == "1" # not a decimal
 
     event = events.TextNode(
         text=datetime(2017, 3, 17), schema_dict={"not_numeric": "decimal"}

From 23b511b06def2be74037d900a8646cee3c761020 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 30 Aug 2023 10:02:32 +0000
Subject: [PATCH 37/40] Add tests for min/max to_numeric

---
 tests/social_work_workforce/test_converters.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/social_work_workforce/test_converters.py b/tests/social_work_workforce/test_converters.py
index 36aed7ce..a4526ebe 100644
--- a/tests/social_work_workforce/test_converters.py
+++ b/tests/social_work_workforce/test_converters.py
@@ -37,6 +37,11 @@ def test_to_numeric():
     assert converters.to_numeric("date", "") == "date"
     assert converters.to_numeric("", "decimal", decimal_places) == ""
     assert converters.to_numeric(None, "decimal", decimal_places) == ""
+    assert converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=0, max_inclusive=1) == 0.3
+    assert converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=0) == 0.3
+    assert converters.to_numeric("0.3", "decimal", decimal_places, max_inclusive=1) == 0.3
+    assert converters.to_numeric("1.99", "decimal", decimal_places, min_inclusive=0, max_inclusive=1) == "error"
+    assert converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=1, max_inclusive=99) == "error"
     assert converters.to_numeric("3000", "integer") == 3000
     assert converters.to_numeric(123, "integer") == 123
     assert converters.to_numeric("1.0", "integer") == 1

From 72f649da506694d8ef57652d283ef900211205d3 Mon Sep 17 00:00:00 2001
From: patrick-troy <58770937+patrick-troy@users.noreply.github.com>
Date: Fri, 1 Sep 2023 15:51:06 +0100
Subject: [PATCH 38/40] add validation, fix minor errors, run python black

---
 liiatools/datasets/cin_census/cin_cli.py      |   9 +-
 .../cin_census/lds_cin_clean/filters.py       |   9 +-
 .../cin_census/lds_cin_clean/logger.py        |  12 +-
 .../cin_census/lds_cin_la_agg/process.py      |   2 +-
 .../cin_census/lds_cin_pan_agg/process.py     |   2 +-
 liiatools/datasets/shared_functions/common.py |  20 ++-
 .../SWFtools/analysis/FTESum.py               |   8 +-
 .../SWFtools/analysis/growth_tables.py        |   2 +-
 .../SWFtools/analysis/seniority.py            |  14 +-
 .../social_work_workforce/csww_cli.py         |   3 +-
 .../csww_main_functions.py                    |  37 ++----
 .../lds_csww_clean/cleaner.py                 |  19 +--
 .../lds_csww_clean/converters.py              |  39 +++---
 .../lds_csww_clean/csww_record.py             |   8 +-
 .../lds_csww_clean/file_creator.py            |  10 +-
 .../lds_csww_clean/filters.py                 |  89 ++-----------
 .../lds_csww_clean/logger.py                  |  51 +++++--
 .../lds_csww_clean/schema.py                  |   5 +-
 .../lds_csww_clean/validator.py               |  63 +++++++++
 .../lds_csww_clean/xml.py                     |  49 -------
 .../lds_csww_data_generator/stream.py         |   2 +-
 .../spec/social_work_workforce/la-agg.yml     |   6 +
 .../spec/social_work_workforce/pan-agg.yml    |   6 +
 tests/cin_census/test_converter.py            |   4 +-
 tests/cin_census/test_file_creator.py         |   1 -
 tests/cin_census/test_schema.py               |   4 +-
 tests/common/test_common.py                   |  12 +-
 tests/s903/test_file_creator.py               | 102 ++++++++++----
 tests/s903/test_populate.py                   |   4 +-
 tests/social_work_workforce/test_cleaner.py   | 124 +++++++++++++-----
 .../social_work_workforce/test_converters.py  |  51 ++++---
 .../social_work_workforce/test_csww_record.py |   7 -
 tests/social_work_workforce/test_logger.py    |  45 +++++--
 33 files changed, 471 insertions(+), 348 deletions(-)
 create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/validator.py
 delete mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py

diff --git a/liiatools/datasets/cin_census/cin_cli.py b/liiatools/datasets/cin_census/cin_cli.py
index 4648bcef..36d36400 100644
--- a/liiatools/datasets/cin_census/cin_cli.py
+++ b/liiatools/datasets/cin_census/cin_cli.py
@@ -28,7 +28,7 @@
     check_year,
     check_year_within_range,
     save_year_error,
-    save_incorrect_year_error
+    save_incorrect_year_error,
 )
 
 # Dependencies for la_agg()
@@ -118,7 +118,12 @@ def cleanfile(input, la_code, la_log_dir, output):
     years_to_go_back = 6
     year_start_month = 6
     reference_date = datetime.now()
-    if check_year_within_range(input_year, years_to_go_back, year_start_month, reference_date) is False:
+    if (
+        check_year_within_range(
+            input_year, years_to_go_back, year_start_month, reference_date
+        )
+        is False
+    ):
         save_incorrect_year_error(input, la_log_dir)
         return
 
diff --git a/liiatools/datasets/cin_census/lds_cin_clean/filters.py b/liiatools/datasets/cin_census/lds_cin_clean/filters.py
index fe3d48e5..93909677 100644
--- a/liiatools/datasets/cin_census/lds_cin_clean/filters.py
+++ b/liiatools/datasets/cin_census/lds_cin_clean/filters.py
@@ -81,7 +81,10 @@ def _create_category_dict(field: str, file: str):
             documentation = element.findall(search_doc)
             for i, d in enumerate(documentation):
                 name_dict = {"name": d.text}
-                category_dict["category"][i] = {**category_dict["category"][i], **name_dict}
+                category_dict["category"][i] = {
+                    **category_dict["category"][i],
+                    **name_dict,
+                }
 
             return category_dict
 
@@ -99,7 +102,9 @@ def _create_float_dict(field: str, file: str):
     search_restriction = f".//{{http://www.w3.org/2001/XMLSchema}}restriction"
     restriction = element.findall(search_restriction)
     for r in restriction:
-        code_dict = {"numeric": r.get("base")[3:]}  # Remove the "xs:" from the start of the base string
+        code_dict = {
+            "numeric": r.get("base")[3:]
+        }  # Remove the "xs:" from the start of the base string
         if code_dict["numeric"] == "decimal":
             float_dict = code_dict
 
diff --git a/liiatools/datasets/cin_census/lds_cin_clean/logger.py b/liiatools/datasets/cin_census/lds_cin_clean/logger.py
index 92198036..d37268ef 100644
--- a/liiatools/datasets/cin_census/lds_cin_clean/logger.py
+++ b/liiatools/datasets/cin_census/lds_cin_clean/logger.py
@@ -66,9 +66,7 @@ def counter(event, counter_check, value_error, structural_error, blank_error):
                 )
         else:
             if hasattr(event, "validation_message"):
-                blank_error.append(
-                    f"LAchildID: blank, Node: {event.schema.name}"
-                )
+                blank_error.append(f"LAchildID: blank, Node: {event.schema.name}")
             elif hasattr(event.schema, "name"):
                 value_error.append(f"LAchildID: blank, Node: {event.schema.name}")
             else:
@@ -100,13 +98,7 @@ def save_errors_la(
     """
     filename = str(Path(input).resolve().stem)
     start_time = f"{datetime.now():%Y-%m-%dT%H%M%SZ}"
-    if (
-        value_error
-        or structural_error
-        or field_error
-        or blank_error
-        or LAchildID_error
-    ):
+    if value_error or structural_error or field_error or blank_error or LAchildID_error:
         with open(
             f"{Path(la_log_dir, filename)}_error_log_{start_time}.txt",
             "a",
diff --git a/liiatools/datasets/cin_census/lds_cin_la_agg/process.py b/liiatools/datasets/cin_census/lds_cin_la_agg/process.py
index fd328637..370e96c1 100644
--- a/liiatools/datasets/cin_census/lds_cin_la_agg/process.py
+++ b/liiatools/datasets/cin_census/lds_cin_la_agg/process.py
@@ -109,7 +109,7 @@ def _time_between_date_series(later_date_series, earlier_date_series, years=0, d
 
     elif years == 1:
         years_series = (days_series / 365).apply(np.floor)
-        years_series = years_series.astype('Int32')
+        years_series = years_series.astype("Int32")
         return years_series
 
 
diff --git a/liiatools/datasets/cin_census/lds_cin_pan_agg/process.py b/liiatools/datasets/cin_census/lds_cin_pan_agg/process.py
index ea692096..e8879f10 100644
--- a/liiatools/datasets/cin_census/lds_cin_pan_agg/process.py
+++ b/liiatools/datasets/cin_census/lds_cin_pan_agg/process.py
@@ -94,7 +94,7 @@ def _time_between_date_series(later_date_series, earlier_date_series, years=0, d
 
     elif years == 1:
         years_series = (days_series / 365).apply(np.floor)
-        years_series = years_series.astype('Int32')
+        years_series = years_series.astype("Int32")
         return years_series
 
 
diff --git a/liiatools/datasets/shared_functions/common.py b/liiatools/datasets/shared_functions/common.py
index be019df9..3095d2e9 100644
--- a/liiatools/datasets/shared_functions/common.py
+++ b/liiatools/datasets/shared_functions/common.py
@@ -83,7 +83,7 @@ def save_year_error(input, la_log_dir):
     :param la_log_dir: Path to the local authority's log folder
     :return: Text file containing the error information
     """
-    
+
     filename = Path(input).resolve().stem
     start_time = f"{datetime.now():%d-%m-%Y %Hh-%Mm-%Ss}"
     with open(
@@ -93,8 +93,8 @@ def save_year_error(input, la_log_dir):
         f.write(
             f"Could not process '{filename}' because no year was found in the name of the file"
         )
-        
-        
+
+
 def check_year_within_range(year, num_of_years, new_year_start_month, as_at_date):
     """
     Check that year is within permitted range of data retention policy
@@ -113,13 +113,13 @@ def check_year_within_range(year, num_of_years, new_year_start_month, as_at_date
     current_month = as_at_date.month
     if current_month < new_year_start_month:
         earliest_allowed_year = current_year - num_of_years
-        latest_allowed_year = current_year 
+        latest_allowed_year = current_year
     else:
         earliest_allowed_year = current_year - num_of_years + 1  # roll forward one year
         latest_allowed_year = current_year + 1
 
     return earliest_allowed_year <= year_to_check <= latest_allowed_year
-    
+
 
 def save_incorrect_year_error(input, la_log_dir):
     """
@@ -177,10 +177,16 @@ def check_year(filename):
 
     fy_match = re.search(r"(\d{2})(.{0,3}\d{2})(.*)(\d*)", filename)
     if fy_match:
-        if len(fy_match.group(2)) == 2 and int(fy_match.group(2)) == int(fy_match.group(1)) + 1:
+        if (
+            len(fy_match.group(2)) == 2
+            and int(fy_match.group(2)) == int(fy_match.group(1)) + 1
+        ):
             year = "20" + fy_match.group(2)
             return year
-        if len(fy_match.group(2)) == 3 and int(fy_match.group(2)[-2:]) == int(fy_match.group(1)) + 1:
+        if (
+            len(fy_match.group(2)) == 3
+            and int(fy_match.group(2)[-2:]) == int(fy_match.group(1)) + 1
+        ):
             year = "20" + fy_match.group(2)[-2:]
             return year
         if int(fy_match.group(3)[1:3]) == int(fy_match.group(2)[-2:]) + 1:
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
index 6bc8e1b8..ecafe1b5 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/FTESum.py
@@ -40,7 +40,7 @@ def FTESum_2020():
 
     :return: Excel file with the name FTESum_2020.xlsx and the same path as the input file
     """
-    
+
     # ===== Read file ===== #
     file = "CompMergSen.csv"
     requestPath = work_path.request
@@ -48,11 +48,13 @@ def FTESum_2020():
     df = pd.read_csv(pathFile)
 
     df2020 = df[df["YearCensus"] == 2020]
-    
+
     if df2020.empty:
         AppLogs.log("FTESum_2020 error: No data for year 2020", console_output=True)
     else:
-        df5D = df2020[["LEAName", "YearCensus", "SeniorityCode", "SeniorityName", "FTE"]]
+        df5D = df2020[
+            ["LEAName", "YearCensus", "SeniorityCode", "SeniorityName", "FTE"]
+        ]
 
         df5D = df2020.groupby(
             ["LEAName", "YearCensus", "SeniorityCode", "SeniorityName"]
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
index 2147de37..def4cbf2 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/growth_tables.py
@@ -12,7 +12,7 @@ def growth_tables():
     """
     Create two Excel files with tables of growth rates and population growth for six LEAs
     """
-    
+
     growth_rate_df = {
         "LEAName": [
             "Havering",
diff --git a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
index df4b2677..1d1d5236 100644
--- a/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
+++ b/liiatools/datasets/social_work_workforce/SWFtools/analysis/seniority.py
@@ -137,9 +137,11 @@ def seniority_forecast_04():
         AppLogs.log(f"FileNotFoundError: {e.filename}", console_output=True)
         return
 
-    
     if dfSen.empty:
-        AppLogs.log("seniority_forecast_04 error: No data in FTESum_2020.xlsx", console_output=True)
+        AppLogs.log(
+            "seniority_forecast_04 error: No data in FTESum_2020.xlsx",
+            console_output=True,
+        )
     else:
         # ===== Rename column ===== #
         dfSen.rename(columns={"FTESum": "2020"}, inplace=True)
@@ -160,9 +162,13 @@ def seniority_forecast_04():
                 dfSen[str(countYearBefore)] / p_df.loc[0, str(countYearBefore)]
             ) * p_df.loc[0, str(countYearNext)]
             # Barking and Dagenham
-            dfSen.loc[dfSen["LEAName"] == "Barking and Dagenham", str(countYearNext)] = (
+            dfSen.loc[
+                dfSen["LEAName"] == "Barking and Dagenham", str(countYearNext)
+            ] = (
                 dfSen[str(countYearBefore)] / p_df.loc[1, str(countYearBefore)]
-            ) * p_df.loc[1, str(countYearNext)]
+            ) * p_df.loc[
+                1, str(countYearNext)
+            ]
             # Redbridge
             dfSen.loc[dfSen["LEAName"] == "Redbridge", str(countYearNext)] = (
                 dfSen[str(countYearBefore)] / p_df.loc[2, str(countYearBefore)]
diff --git a/liiatools/datasets/social_work_workforce/csww_cli.py b/liiatools/datasets/social_work_workforce/csww_cli.py
index 99f7ca33..486e132c 100644
--- a/liiatools/datasets/social_work_workforce/csww_cli.py
+++ b/liiatools/datasets/social_work_workforce/csww_cli.py
@@ -24,6 +24,7 @@ def csww():
     """
     pass
 
+
 @csww.command()
 @click.option(
     "--i",
@@ -139,4 +140,4 @@ def pan_agg(input, la_code, output):
     :param output: should specify the path to the output folder
     :return: None
     """
-    csww_main_functions.pan_agg(input, la_code, output)
\ No newline at end of file
+    csww_main_functions.pan_agg(input, la_code, output)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 63d599e2..6b88566f 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -2,8 +2,13 @@
 from datetime import datetime
 import yaml
 
-from liiatools.datasets.social_work_workforce.lds_csww_data_generator.sample_data import generate_sample_csww_file
-from liiatools.datasets.social_work_workforce.lds_csww_data_generator.stream import consume
+# Dependencies for generate_sample()
+from liiatools.datasets.social_work_workforce.lds_csww_data_generator.sample_data import (
+    generate_sample_csww_file,
+)
+from liiatools.datasets.social_work_workforce.lds_csww_data_generator.stream import (
+    consume,
+)
 
 # Dependencies for cleanfile()
 from liiatools.datasets.social_work_workforce.lds_csww_clean.parse import (
@@ -23,6 +28,7 @@
     cleaner,
     logger,
     filters,
+    validator as clean_validator,
 )
 
 from liiatools.spec import common as common_asset_dir
@@ -133,6 +139,7 @@ def cleanfile(input, la_code, la_log_dir, output):
 
     # Clean stream
     stream = cleaner.clean(stream)
+    stream = clean_validator.validate_elements(stream)
     stream = logger.log_errors(stream)
 
     # Output results
@@ -212,29 +219,3 @@ def pan_agg(input, la_code, output):
         la_name = flip_dict(config["data_codes"])[la_code]
         csww_df = pan_process.merge_agg_files(output, table_name, csww_df, la_name)
         pan_process.export_pan_file(output, table_name, csww_df)
-
-
-# Run in Visual Studio Code |>
-
-# cleanfile(
-#     "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022_sc.xml",
-#     "BAD",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
-
-# la_agg(
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_worker_clean.csv",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
-
-# la_agg(
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/social_work_workforce_2022_lalevel_clean.csv",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
-
-# pan_agg(
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean/CSWW_CSWWWorker_merged.csv",
-#     "BAD",
-#     "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
-# )
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
index 57acf184..752ccea5 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/cleaner.py
@@ -71,13 +71,16 @@ def clean_numeric(event):
     """
     numeric = event.schema_dict["numeric"]
     try:
-        if numeric == "integer":
-            clean_text = to_numeric(value=event.text, config=numeric)
-        elif numeric == "decimal":
-            decimal_places = int(event.schema_dict["decimal"])
-            min_inclusive = event.schema_dict.get("min_inclusive", None)
-            max_inclusive = event.schema_dict.get("max_inclusive", None)
-            clean_text = to_numeric(value=event.text, config=numeric, decimal_places=decimal_places, min_inclusive=min_inclusive, max_inclusive=max_inclusive)
+        decimal_places = event.schema_dict.get("decimal", None)
+        min_inclusive = event.schema_dict.get("min_inclusive", None)
+        max_inclusive = event.schema_dict.get("max_inclusive", None)
+        clean_text = to_numeric(
+            value=event.text,
+            config=numeric,
+            decimal_places=decimal_places,
+            min_inclusive=min_inclusive,
+            max_inclusive=max_inclusive,
+        )
         if clean_text != "error":
             return event.from_event(event, text=clean_text, formatting_error="0")
         return event.from_event(event, text="", formatting_error="1")
@@ -111,7 +114,7 @@ def clean(stream):
     """
     Compile the cleaning functions
 
-    :param event: A list of event objects
+    :param stream: A list of event objects
     :return: An updated list of event objects
     """
     stream = clean_dates(stream)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
index 9c813a7d..4c895b04 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/converters.py
@@ -19,7 +19,7 @@ def to_category(string, categories):
             return code["code"]
         if (
             str(string).lower() == str(code["code"]).lower() + ".0"
-            ):  # In case integers are read as floats
+        ):  # In case integers are read as floats
             return code["code"]
         if "name" in code:
             if str(code["name"]).lower() in str(string).lower():
@@ -37,31 +37,25 @@ def to_numeric(value, config, decimal_places=0, min_inclusive=None, max_inclusiv
 
     :param value: Some value to convert to an integer or decimal
     :param config: The loaded configuration
-    :param dec_places: The number of decimal places to apply (default 0) 
+    :param decimal_places: The number of decimal places to apply (default 0)
     :param min_inclusive: Minimum value allowed (default none)
     :param max_inclusive: Maximum value allowed (default none)
-    :return: Either an integer, a decimal value formatted to number of decimal places or an "error" string if value could not be formatted as decimal or a blank string if no value provided
+    :return: Either an integer, a decimal value formatted to number of decimal places or an "error" string if
+    value could not be formatted as decimal or a blank string if no value provided
     """
     if config == "decimal":
         if value or value == 0:
-            try:
-                float(value)
-            except (ValueError, TypeError):
-                return "error" # value is not a float
-            round_to_dp = round(float(value), decimal_places)
-            if (min_inclusive is None or round_to_dp >= min_inclusive) and (max_inclusive is None or round_to_dp <= max_inclusive):
+            round_to_dp = round(float(value), int(decimal_places))
+            if (min_inclusive is None or round_to_dp >= float(min_inclusive)) and (
+                max_inclusive is None or round_to_dp <= float(max_inclusive)
+            ):
                 return round_to_dp
-            return "error" # min/max error
-        return "" # no value provided
+            return "error"  # min/max error
+        return ""  # no value provided
     if config == "integer":
-        if value or value==0:
-            if isinstance(value, str) and value[-2:] == ".0":
-                return int(float(value))
-            elif value or value == 0:
-                return int(value)
-            else:
-                return "error" # value incorrectly formatted
-        return "" # no value provided
+        if value or value == 0:
+            return int(float(value))
+        return ""  # no value provided
     else:
         return value
 
@@ -72,13 +66,14 @@ def to_regex(value, pattern):
 
     :param value: Some value to convert to a regex string
     :param pattern: The regex pattern to compare
-    :return: Either a string matching the regex pattern or an "error" string if value does not match pattern or a blank string if no value provided
+    :return: Either a string matching the regex pattern or an "error" string if value does not match pattern or a
+    blank string if no value provided
     """
     if value:
         stripped_value = value.strip()
         isfullmatch = re.fullmatch(pattern, stripped_value)
         if isfullmatch:
             return stripped_value
-        return "error" # value does not match regex pattern
+        return "error"  # value does not match regex pattern
     else:
-        return "" # no value provided
\ No newline at end of file
+        return ""  # no value provided
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 85d058ff..b0adb5f4 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -34,7 +34,7 @@ def text_collector(stream):
     Create a dictionary of text values for each element
 
     :param stream: An iterator of events from an XML parser
-    :return: Dictionary containing element name and text values 
+    :return: Dictionary containing element name and text values
     """
     data_dict = {}
     current_element = None
@@ -82,9 +82,15 @@ def message_collector(stream):
     "GenderCurrent",
     "Ethnicity",
     "QualInst",
+    "QualLevel",
     "StepUpGrad",
+    "OrgRole",
     "RoleStartDate",
     "StartOrigin",
+    "RoleEndDate",
+    "LeaverDestination",
+    "ReasonLeave",
+    "FTE30",
     "Cases30",
     "WorkingDaysLost",
     "ContractWeeks",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index 38da3243..c7aae5ac 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -15,13 +15,13 @@ def convert_to_dataframe(data):
     return data
 
 
-def get_year(data, year):
+def add_year(data, year):
     data["YEAR"] = year
     return data
 
 
 def convert_to_datetime(data):
-    if set(["PersonBirthDate", "RoleStartDate"]).issubset(data):
+    if {"PersonBirthDate", "RoleStartDate"}.issubset(data):
         data[["PersonBirthDate", "RoleStartDate"]] = data[
             ["PersonBirthDate", "RoleStartDate"]
         ].apply(pd.to_datetime)
@@ -48,7 +48,9 @@ def degrade_SWENo(data):
     """
     if "SWENo" in data:
         if data["SWENo"] is not None:
-            data["SWENo"] = data["SWENo"].apply(lambda row: swe_hash(row) if row else row )
+            data["SWENo"] = data["SWENo"].apply(
+                lambda row: swe_hash(row) if row else row
+            )
     return data
 
 
@@ -81,7 +83,7 @@ def add_fields(input_year, data, la_name):
     :return: Dataframe with year and LA added
     """
     data = convert_to_dataframe(data)
-    data = get_year(data, input_year)
+    data = add_year(data, input_year)
     data = add_la_name(data, la_name)
     return data
 
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
index 3073a5c8..8b81520f 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/filters.py
@@ -2,11 +2,9 @@
 from typing import List
 import xml.etree.ElementTree as ET
 import xmlschema
-from xmlschema import XMLSchemaValidatorError
 
 from sfdata_stream_parser.checks import type_check
 from sfdata_stream_parser import events
-from sfdata_stream_parser.collectors import collector, block_check
 from sfdata_stream_parser.filters.generic import streamfilter, pass_event
 
 log = logging.getLogger(__name__)
@@ -88,6 +86,14 @@ def _create_category_dict(field: str, file: str):
 
 
 def _create_float_dict(field: str, file: str):
+    """
+    Create a dictionary containing the different float parameters of a given field to conform floats
+    e.g. {'numeric': 'decimal', 'fixed': 'true', 'decimal': '6', 'min_inclusive': '0', 'max_inclusive': '1'}
+
+    :param field: Name of the float field you want to find the parameters for
+    :param file: Path to the .xsd schema containing possible float parameters
+    :return: Dictionary of float parameters
+    """
     float_dict = None
 
     xsd_xml = ET.parse(file)
@@ -173,7 +179,11 @@ def add_schema(event, schema: xmlschema.XMLSchema):
     return event.from_event(event, path=path, schema=el)
 
 
-@streamfilter(check=type_check(events.TextNode), fail_function=pass_event)
+@streamfilter(
+    check=type_check(events.TextNode),
+    fail_function=pass_event,
+    error_function=pass_event,
+)
 def add_schema_dict(event, schema_path: str):
     """
     Add a dictionary of schema attributes to an event object based on its type and occurrence
@@ -206,76 +216,3 @@ def add_schema_dict(event, schema_path: str):
                 schema_dict = {**schema_dict, **{"canbeblank": False}}
 
     return event.from_event(event, schema_dict=schema_dict)
-
-
-def _get_validation_error(schema, node) -> XMLSchemaValidatorError:
-    try:
-        schema.validate(node)
-        return None
-    except XMLSchemaValidatorError as e:
-        return e
-
-
-@streamfilter(check=type_check(events.StartElement), fail_function=pass_event)
-def validate_elements(event):
-    """
-    Validates each element, and if not valid, sets the properties:
-
-    * valid - (always False)
-    * validation_message - a descriptive validation message
-    """
-    validation_error = _get_validation_error(event.schema, event.node)
-    if validation_error is None:
-        return event
-
-    message = (
-        validation_error.reason
-        if hasattr(validation_error, "reason")
-        else validation_error.message
-    )
-    return events.StartElement.from_event(
-        event, valid=False, validation_message=message
-    )
-
-
-@streamfilter(check=type_check(events.StartElement), fail_function=pass_event)
-def prop_to_attribute(event, prop_name):
-    """
-    Elevates an event property to an XML attribute.
-    """
-    if hasattr(event, prop_name):
-        attrs = getattr(event, "attrs", {})
-        attrs[prop_name] = getattr(event, prop_name)
-        return events.StartElement.from_event(event, attrs=attrs)
-    else:
-        return event
-
-
-@collector(check=block_check(events.StartElement), receive_stream=True)
-def remove_invalid(stream, tag_name):
-    """
-    Filters out events with the given tag name if they are not valid
-    """
-    stream = list(stream)
-    first = stream[0]
-    last = stream[-1]
-    stream = stream[1:-1]
-
-    if first.tag == tag_name and not getattr(first, "valid", True):
-        yield from []
-    else:
-        yield first
-
-        if len(stream) > 0:
-            yield from remove_invalid(stream, tag_name=tag_name)
-
-        yield last
-
-
-@streamfilter(check=lambda x: True)
-def counter(event, counter_check, context):
-    if counter_check(event):
-        context["pass"] += 1
-    else:
-        context["fail"] += 1
-    return event
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
index 8c7e715a..b27eb03a 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/logger.py
@@ -31,10 +31,7 @@ def create_formatting_error_list(stream):
                 formatting_error_list=formatting_error_list,
             )
             formatting_error_list = None
-        elif (
-            formatting_error_list is not None
-            and isinstance(event, events.TextNode)
-        ):
+        elif formatting_error_list is not None and isinstance(event, events.TextNode):
             try:
                 if event.formatting_error == "1":
                     formatting_error_list.append(event.schema.name)
@@ -44,7 +41,9 @@ def create_formatting_error_list(stream):
 
 
 @streamfilter(
-    check=type_check(events.TextNode), fail_function=pass_event, error_function=pass_event
+    check=type_check(events.TextNode),
+    fail_function=pass_event,
+    error_function=pass_event,
 )
 def blank_error_check(event):
     """
@@ -82,10 +81,7 @@ def create_blank_error_list(stream):
         elif isinstance(event, ErrorTable):
             yield ErrorTable.from_event(event, blank_error_list=blank_error_list)
             blank_error_list = None
-        elif (
-            blank_error_list is not None
-            and isinstance(event, events.TextNode)
-        ):
+        elif blank_error_list is not None and isinstance(event, events.TextNode):
             try:
                 if event.blank_error == "1":
                     blank_error_list.append(event.schema.name)
@@ -94,6 +90,27 @@ def create_blank_error_list(stream):
         yield event
 
 
+def create_validation_error_list(stream):
+    """
+    Create a list of the validation errors
+
+    :param stream: A filtered list of event objects
+    :return: An updated list of event objects
+    """
+    validation_error_list = []
+    for event in stream:
+        if isinstance(event, ErrorTable):
+            yield ErrorTable.from_event(
+                event, validation_error_list=validation_error_list
+            )
+            validation_error_list = None
+        elif isinstance(event, events.StartElement):
+            validation_message = getattr(event, "validation_message", None)
+            if validation_message is not None:
+                validation_error_list.append(validation_message)
+        yield event
+
+
 def save_errors_la(stream, la_log_dir, filename):
     """
     Count the error events and save them as a text file in the Local Authority Logs directory
@@ -110,8 +127,13 @@ def save_errors_la(stream, la_log_dir, filename):
             if isinstance(event, ErrorTable) and (
                 event.formatting_error_list is not None
                 and event.blank_error_list is not None
+                and event.validation_error_list is not None
             ):
-                if event.formatting_error_list or event.blank_error_list:
+                if (
+                    event.formatting_error_list
+                    or event.blank_error_list
+                    or event.validation_error_list
+                ):
                     with open(
                         f"{os.path.join(la_log_dir, filename)}_error_log_{start_time}.txt",
                         "a",
@@ -138,6 +160,14 @@ def save_errors_la(stream, la_log_dir, filename):
                                 str(blank_counter_dict)[9:-2]
                             )  # Remove "Counter({" and "})" from string
                             f.write("\n")
+                        if event.validation_error_list:
+                            event.validation_error_list = list(
+                                dict.fromkeys(event.validation_error_list)
+                            )  # Remove duplicate information from list but
+                            # keep order
+                            for item in event.validation_error_list:
+                                f.write(item)
+                                f.write("\n")
         except AttributeError:
             pass
 
@@ -154,4 +184,5 @@ def log_errors(stream):
     stream = blank_error_check(stream)
     stream = create_formatting_error_list(stream)
     stream = create_blank_error_list(stream)
+    stream = create_validation_error_list(stream)
     return stream
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
index c0905a73..99baeae0 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
@@ -12,7 +12,10 @@ def __init__(self, year):
 
     @cached_property
     def path(self):
-        return Path(social_work_workforce_dir.__file__).parent / f"social_work_workforce_{self.__year}.xsd"
+        return (
+            Path(social_work_workforce_dir.__file__).parent
+            / f"social_work_workforce_{self.__year}.xsd"
+        )
 
 
 class Schema:
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/validator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/validator.py
new file mode 100644
index 00000000..c2177a71
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/validator.py
@@ -0,0 +1,63 @@
+import logging
+import re
+
+from sfdata_stream_parser.checks import type_check
+from sfdata_stream_parser import events
+from sfdata_stream_parser.filters.generic import streamfilter, pass_event
+
+log = logging.getLogger(__name__)
+
+
+def _get_validation_error(event, schema, node):
+    """
+    Validate an event
+
+    :param event: A filtered list of event objects
+    :param schema: The xml schema attached to a given event
+    :param node: The node attached to a given event
+    :return: Event and error information
+    """
+    try:
+        validation_error_iterator = schema.iter_errors(node)
+        for validation_error in validation_error_iterator:
+            if " expected" in validation_error.reason:
+
+                reg_line = re.compile(
+                    r"(?=\(line.*?(\w+))", re.MULTILINE
+                )  # Search for the number after "line" in error
+                missing_field_line = reg_line.search(str(validation_error)).group(1)
+
+                reg_exp = re.compile(
+                    r"(?=\sTag.*?(\w+))"
+                )  # Search for the first word after "Tag"
+                missing_field = reg_exp.search(validation_error.reason).group(1)
+
+                errors = (
+                    f"Missing required field: '{missing_field}' which occurs in the node starting on "
+                    f"line: {missing_field_line}"
+                )
+
+                return event.from_event(event, reason=errors)
+
+    except AttributeError:  # Raised for nodes that don't exist in the schema
+        reason = f"Unexpected node '{event.tag}'"
+        return event.from_event(event, reason=reason)
+
+
+@streamfilter(check=type_check(events.StartElement), fail_function=pass_event)
+def validate_elements(event):
+    """
+    Validates each element, and if not valid, sets the properties:
+
+    :param event: A filtered list of event objects
+
+    * valid - (always False)
+    * validation_message - a descriptive validation message
+    """
+    validation_error = _get_validation_error(event, event.schema, event.node)
+
+    if validation_error is None:
+        return event
+
+    message = validation_error.reason
+    return event.from_event(event, valid=False, validation_message=message)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py
deleted file mode 100644
index 58bc03fc..00000000
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/xml.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from sfdata_stream_parser.events import (
-    StartElement,
-    EndElement,
-    TextNode,
-    CommentNode,
-    ProcessingInstructionNode,
-)
-
-try:
-    from lxml import etree
-except ImportError:
-    pass
-
-
-def dom_parse(source, **kwargs):
-    """
-    Equivalent of the xml parse included in the sfdata_stream_parser package, but uses the ET DOM
-    and allows direct DOM manipulation.
-    """
-    parser = etree.iterparse(source, events=("start", "end", "comment", "pi"), **kwargs)
-    for action, elem in parser:
-        if action == "start":
-            yield StartElement(tag=elem.tag, attrib=elem.attrib, node=elem)
-            yield TextNode(text=elem.text)
-        elif action == "end":
-            yield EndElement(tag=elem.tag, node=elem)
-            if elem.tail:
-                yield TextNode(text=elem.tail)
-        elif action == "comment":
-            yield CommentNode(text=elem.text, node=elem)
-        elif action == "pi":
-            yield ProcessingInstructionNode(name=elem.target, text=elem.text, node=elem)
-        else:
-            raise ValueError(f"Unknown event: {action}")
-
-
-def to_xml(stream, builder: etree.TreeBuilder):
-    for ev in stream:
-        if isinstance(ev, StartElement):
-            builder.start(ev.tag, getattr(ev, "attrs", {}))
-        elif isinstance(ev, EndElement):
-            builder.end(ev.tag)
-        elif isinstance(ev, TextNode):
-            builder.data(ev.text)
-        elif isinstance(ev, CommentNode):
-            builder.comment(ev.text)
-        elif isinstance(ev, ProcessingInstructionNode):
-            builder.pi(ev.name, ev.text)
-        yield ev
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_data_generator/stream.py b/liiatools/datasets/social_work_workforce/lds_csww_data_generator/stream.py
index 45fc26d6..13c1bf56 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_data_generator/stream.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_data_generator/stream.py
@@ -3,7 +3,7 @@
 
 def consume(stream) -> Counter:
     """
-        Ensures the stream is consumed and returns a summary of the numbers of each event that has been encountered
+    Ensures the stream is consumed and returns a summary of the numbers of each event that has been encountered
     """
     stream_types = [type(ev) for ev in stream]
     return Counter(stream_types)
diff --git a/liiatools/spec/social_work_workforce/la-agg.yml b/liiatools/spec/social_work_workforce/la-agg.yml
index 3d64fe54..781ebccd 100644
--- a/liiatools/spec/social_work_workforce/la-agg.yml
+++ b/liiatools/spec/social_work_workforce/la-agg.yml
@@ -7,9 +7,15 @@ column_names:
         - GenderCurrent
         - Ethnicity
         - QualInst
+        - QualLevel
         - StepUpGrad
+        - OrgRole
         - RoleStartDate
         - StartOrigin
+        - RoleEndDate
+        - LeaverDestination
+        - ReasonLeave
+        - FTE30
         - Cases30
         - WorkingDaysLost
         - ContractWeeks
diff --git a/liiatools/spec/social_work_workforce/pan-agg.yml b/liiatools/spec/social_work_workforce/pan-agg.yml
index ecf3b9b6..f74bdf34 100644
--- a/liiatools/spec/social_work_workforce/pan-agg.yml
+++ b/liiatools/spec/social_work_workforce/pan-agg.yml
@@ -7,9 +7,15 @@ column_names:
         - GenderCurrent
         - Ethnicity
         - QualInst
+        - QualLevel
         - StepUpGrad
+        - OrgRole
         - RoleStartDate
         - StartOrigin
+        - RoleEndDate
+        - LeaverDestination
+        - ReasonLeave
+        - FTE30
         - Cases30
         - WorkingDaysLost
         - ContractWeeks
diff --git a/tests/cin_census/test_converter.py b/tests/cin_census/test_converter.py
index 74cd2eb5..e6f044db 100644
--- a/tests/cin_census/test_converter.py
+++ b/tests/cin_census/test_converter.py
@@ -15,7 +15,7 @@ def __init__(self):
         [
             events.TextNode(text="false", schema=Schema()),
             events.TextNode(text="true", schema=Schema()),
-            events.TextNode(text="TRUE", schema=Schema())
+            events.TextNode(text="TRUE", schema=Schema()),
         ]
     )
     stream = list(stream)
@@ -38,5 +38,3 @@ def __init__(self):
     assert stream[0].text == "false"
     assert stream[1].text == "true"
     assert stream[2].text == "true"
-
-
diff --git a/tests/cin_census/test_file_creator.py b/tests/cin_census/test_file_creator.py
index 0e8ac8a3..cc7d259d 100644
--- a/tests/cin_census/test_file_creator.py
+++ b/tests/cin_census/test_file_creator.py
@@ -2,7 +2,6 @@
 
 import pandas as pd
 from datetime import datetime
-import tempfile as tmp
 
 
 def test_get_year():
diff --git a/tests/cin_census/test_schema.py b/tests/cin_census/test_schema.py
index 8a5a48fc..f19bd3c5 100644
--- a/tests/cin_census/test_schema.py
+++ b/tests/cin_census/test_schema.py
@@ -3,8 +3,8 @@
 
 def test_schema():
 
-    schema=Schema(2022).schema
+    schema = Schema(2022).schema
     assert schema.name == "CIN_schema_2022.xsd"
 
-    schema=Schema(2017).schema
+    schema = Schema(2017).schema
     assert schema.name == "CIN_schema_2017.xsd"
diff --git a/tests/common/test_common.py b/tests/common/test_common.py
index b1d7f0f5..318ac61f 100644
--- a/tests/common/test_common.py
+++ b/tests/common/test_common.py
@@ -2,7 +2,7 @@
     check_postcode,
     flip_dict,
     check_year,
-    check_year_within_range
+    check_year_within_range,
 )
 from liiatools.datasets.shared_functions.converters import (
     to_short_postcode,
@@ -65,11 +65,11 @@ def test_check_year():
 
 
 def test_check_year_within_range():
-    assert check_year_within_range(2016, 6, 6, datetime.datetime(2023,5,31)) is False
-    assert check_year_within_range(2023, 6, 6, datetime.datetime(2023,5,31)) is True
-    assert check_year_within_range(2024, 6, 6, datetime.datetime(2023,5,31)) is False
-    assert check_year_within_range(2024, 6, 6, datetime.datetime(2023,6,1)) is True
-    assert check_year_within_range(2013, 10, 2, datetime.datetime(2023,1,31)) is True
+    assert check_year_within_range(2016, 6, 6, datetime.datetime(2023, 5, 31)) is False
+    assert check_year_within_range(2023, 6, 6, datetime.datetime(2023, 5, 31)) is True
+    assert check_year_within_range(2024, 6, 6, datetime.datetime(2023, 5, 31)) is False
+    assert check_year_within_range(2024, 6, 6, datetime.datetime(2023, 6, 1)) is True
+    assert check_year_within_range(2013, 10, 2, datetime.datetime(2023, 1, 31)) is True
 
 
 class TestCheckYear(unittest.TestCase):
diff --git a/tests/s903/test_file_creator.py b/tests/s903/test_file_creator.py
index fb224795..c081b740 100644
--- a/tests/s903/test_file_creator.py
+++ b/tests/s903/test_file_creator.py
@@ -10,30 +10,54 @@
 
 def test_coalesce_row():
     stream = (
-        events.StartRow(expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell="value_one", header="Header_1", expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell="value_two", header="Header_2", expected_columns = ["Header_1", "Header_2"]),
-        events.EndRow(expected_columns = ["Header_1", "Header_2"]),
+        events.StartRow(expected_columns=["Header_1", "Header_2"]),
+        events.Cell(
+            cell="value_one",
+            header="Header_1",
+            expected_columns=["Header_1", "Header_2"],
+        ),
+        events.Cell(
+            cell="value_two",
+            header="Header_2",
+            expected_columns=["Header_1", "Header_2"],
+        ),
+        events.EndRow(expected_columns=["Header_1", "Header_2"]),
     )
     events_complete_rows = list(file_creator.coalesce_row(stream))[0]
     assert events_complete_rows.row == ["value_one", "value_two"]
 
     stream = (
-        events.StartRow(expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell=125, header="Header_1", expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell=341, header="Header_2", expected_columns = ["Header_1", "Header_2"]),
-        events.EndRow(year=2019, expected_columns = ["Header_1", "Header_2"]),
+        events.StartRow(expected_columns=["Header_1", "Header_2"]),
+        events.Cell(
+            cell=125, header="Header_1", expected_columns=["Header_1", "Header_2"]
+        ),
+        events.Cell(
+            cell=341, header="Header_2", expected_columns=["Header_1", "Header_2"]
+        ),
+        events.EndRow(year=2019, expected_columns=["Header_1", "Header_2"]),
     )
     events_complete_rows = list(file_creator.coalesce_row(stream))[0]
     assert events_complete_rows.row == [125, 341]
     assert events_complete_rows.year == 2019
 
     stream = (
-        events.StartRow(expected_columns = ["Header_1", "Header_2", "Header_3"]),
-        events.Cell(cell=125, header="Header_1", expected_columns = ["Header_1", "Header_2", "Header_3"]),
-        events.Cell(cell="string", header="Header_2", expected_columns = ["Header_1", "Header_2", "Header_3"]),
-        events.Cell(cell=datetime(2020, 3, 23), header="Header_3", expected_columns = ["Header_1", "Header_2", "Header_3"]),
-        events.EndRow(expected_columns = ["Header_1", "Header_2", "Header_3"]),
+        events.StartRow(expected_columns=["Header_1", "Header_2", "Header_3"]),
+        events.Cell(
+            cell=125,
+            header="Header_1",
+            expected_columns=["Header_1", "Header_2", "Header_3"],
+        ),
+        events.Cell(
+            cell="string",
+            header="Header_2",
+            expected_columns=["Header_1", "Header_2", "Header_3"],
+        ),
+        events.Cell(
+            cell=datetime(2020, 3, 23),
+            header="Header_3",
+            expected_columns=["Header_1", "Header_2", "Header_3"],
+        ),
+        events.EndRow(expected_columns=["Header_1", "Header_2", "Header_3"]),
     )
     events_complete_rows = list(file_creator.coalesce_row(stream))[0]
     assert events_complete_rows.row == [
@@ -43,37 +67,53 @@ def test_coalesce_row():
     ]
 
     stream = (
-        events.StartRow(expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell=125, header="Header_1", expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell=None, header="Header_2", expected_columns = ["Header_1", "Header_2"]),
-        events.EndRow(expected_columns = ["Header_1", "Header_2"]),
+        events.StartRow(expected_columns=["Header_1", "Header_2"]),
+        events.Cell(
+            cell=125, header="Header_1", expected_columns=["Header_1", "Header_2"]
+        ),
+        events.Cell(
+            cell=None, header="Header_2", expected_columns=["Header_1", "Header_2"]
+        ),
+        events.EndRow(expected_columns=["Header_1", "Header_2"]),
     )
     events_complete_rows = list(file_creator.coalesce_row(stream))[0]
     assert events_complete_rows.row == [125, None]
 
     stream = (
-        events.StartRow(expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell=125, header="Header_1", expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell="", header="Header_2", expected_columns = ["Header_1", "Header_2"]),
-        events.EndRow(expected_columns = ["Header_1", "Header_2"]),
+        events.StartRow(expected_columns=["Header_1", "Header_2"]),
+        events.Cell(
+            cell=125, header="Header_1", expected_columns=["Header_1", "Header_2"]
+        ),
+        events.Cell(
+            cell="", header="Header_2", expected_columns=["Header_1", "Header_2"]
+        ),
+        events.EndRow(expected_columns=["Header_1", "Header_2"]),
     )
     events_complete_rows = list(file_creator.coalesce_row(stream))[0]
     assert events_complete_rows.row == [125, ""]
 
     stream = (
-        events.StartTable(expected_columns = ["Header_1", "Header_2"]),
-        events.StartRow(expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell="value_one", header="Header_1", expected_columns = ["Header_1", "Header_2"]),
-        events.Cell(cell="value_two", header="Header_2", expected_columns = ["Header_1", "Header_2"]),
-        events.EndRow(expected_columns = ["Header_1", "Header_2"]),
-        events.EndTable(expected_columns = ["Header_1", "Header_2"]),
+        events.StartTable(expected_columns=["Header_1", "Header_2"]),
+        events.StartRow(expected_columns=["Header_1", "Header_2"]),
+        events.Cell(
+            cell="value_one",
+            header="Header_1",
+            expected_columns=["Header_1", "Header_2"],
+        ),
+        events.Cell(
+            cell="value_two",
+            header="Header_2",
+            expected_columns=["Header_1", "Header_2"],
+        ),
+        events.EndRow(expected_columns=["Header_1", "Header_2"]),
+        events.EndTable(expected_columns=["Header_1", "Header_2"]),
     )
     events_complete_rows = list(file_creator.coalesce_row(stream))
     for event in events_complete_rows:
         if isinstance(event, file_creator.RowEvent):
             assert event.row == ["value_one", "value_two"]
         else:
-            assert event.as_dict() == {"expected_columns":["Header_1", "Header_2"]}
+            assert event.as_dict() == {"expected_columns": ["Header_1", "Header_2"]}
 
 
 def test_create_tables():
@@ -97,7 +137,11 @@ def test_create_tables():
             assert event.data[0] == data[0]
 
     stream = (
-        events.StartTable(headers=["CHILD ID", "DOB"], expected_columns=expected_columns, match_error="some_error"),
+        events.StartTable(
+            headers=["CHILD ID", "DOB"],
+            expected_columns=expected_columns,
+            match_error="some_error",
+        ),
         file_creator.RowEvent(row=[12345, datetime(2019, 4, 15).date()], year=2019),
         events.EndTable(),
     )
diff --git a/tests/s903/test_populate.py b/tests/s903/test_populate.py
index d7892945..bd910f16 100644
--- a/tests/s903/test_populate.py
+++ b/tests/s903/test_populate.py
@@ -12,13 +12,13 @@ def test_add_year_column():
             events.EndRow(),
             events.EndTable(),
         ],
-        year = "2022"
+        year="2022",
     )
     stream = list(stream)
     assert stream[0].year == "2022"
     assert stream[1].year == "2022"
     assert not hasattr(stream[2], "year")
-    
+
 
 def test_create_la_child_id():
     stream = populate.create_la_child_id(
diff --git a/tests/social_work_workforce/test_cleaner.py b/tests/social_work_workforce/test_cleaner.py
index 8bd6c22d..7f6cd54c 100644
--- a/tests/social_work_workforce/test_cleaner.py
+++ b/tests/social_work_workforce/test_cleaner.py
@@ -4,7 +4,9 @@
 
 
 def test_clean_dates():
-    event = events.TextNode(text=datetime(2019, 1, 15), schema_dict={"date": "%d/%m/%Y"})
+    event = events.TextNode(
+        text=datetime(2019, 1, 15), schema_dict={"date": "%d/%m/%Y"}
+    )
     cleaned_event = list(cleaner.clean_dates(event))[0]
     assert cleaned_event.text == datetime(2019, 1, 15).date()
     assert cleaned_event.formatting_error == "0"
@@ -145,7 +147,7 @@ def test_clean_numeric():
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text="123", schema_dict={"numeric": "integer"})
+    event = events.TextNode(text="123.0", schema_dict={"numeric": "integer"})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123
     assert cleaned_event.formatting_error == "0"
@@ -155,7 +157,9 @@ def test_clean_numeric():
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "1"
 
-    event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "integer"})
+    event = events.TextNode(
+        text=datetime(2017, 3, 17), schema_dict={"numeric": "integer"}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "1"
@@ -166,26 +170,34 @@ def test_clean_numeric():
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == datetime(2017, 3, 17)
 
-    event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 2})
+    event = events.TextNode(
+        text=123.45, schema_dict={"numeric": "decimal", "decimal": 2}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.45
     assert cleaned_event.formatting_error == "0"
-    
-    event = events.TextNode(text=123.4567, schema_dict={"numeric": "decimal", "decimal": 2})
+
+    event = events.TextNode(
+        text=123.4567, schema_dict={"numeric": "decimal", "decimal": 2}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.46
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text=123.45, schema_dict={"numeric": "decimal", "decimal": 0})
+    event = events.TextNode(
+        text=123.45, schema_dict={"numeric": "decimal", "decimal": 0}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text=123.456, schema_dict={"numeric": "decimal", "decimal": 6})
+    event = events.TextNode(
+        text=123.456, schema_dict={"numeric": "decimal", "decimal": 6}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.456
     assert cleaned_event.formatting_error == "0"
-    
+
     event = events.TextNode(text="", schema_dict={"numeric": "decimal", "decimal": 2})
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
@@ -196,35 +208,72 @@ def test_clean_numeric():
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text="123.4567", schema_dict={"numeric": "decimal", "decimal": 2})
+    event = events.TextNode(
+        text="123.4567", schema_dict={"numeric": "decimal", "decimal": 2}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 123.46
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text="0.45", schema_dict={"numeric": "decimal", "decimal": 2, "min_inclusive": 0, "max_inclusive": 1})
+    event = events.TextNode(
+        text="string", schema_dict={"numeric": "decimal", "decimal": 2}
+    )
+    cleaned_event = list(cleaner.clean_numeric(event))[0]
+    assert cleaned_event.text == ""
+    assert cleaned_event.formatting_error == "1"
+
+    event = events.TextNode(
+        text="0.45",
+        schema_dict={
+            "numeric": "decimal",
+            "decimal": 2,
+            "min_inclusive": 0,
+            "max_inclusive": 1,
+        },
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == 0.45
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text="1.99", schema_dict={"numeric": "decimal", "decimal": 2, "min_inclusive": 0, "max_inclusive": 1})
+    event = events.TextNode(
+        text="1.99",
+        schema_dict={
+            "numeric": "decimal",
+            "decimal": 2,
+            "min_inclusive": 0,
+            "max_inclusive": 1,
+        },
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.formatting_error == "1" # exceeds maximum value
+    assert cleaned_event.formatting_error == "1"  # exceeds maximum value
 
-    event = events.TextNode(text="0.50", schema_dict={"numeric": "decimal", "decimal": 2, "min_inclusive": 1, "max_inclusive": 9})
+    event = events.TextNode(
+        text="0.50",
+        schema_dict={
+            "numeric": "decimal",
+            "decimal": 2,
+            "min_inclusive": 1,
+            "max_inclusive": 9,
+        },
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.formatting_error == "1" # less than minimum value
+    assert cleaned_event.formatting_error == "1"  # less than minimum value
 
-    event = events.TextNode(text="string", schema_dict={"numeric": "decimal", "decimal": 2})
+    event = events.TextNode(
+        text="string", schema_dict={"numeric": "decimal", "decimal": 2}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.formatting_error == "1" # not a decimal
+    assert cleaned_event.formatting_error == "1"
 
-    event = events.TextNode(text=datetime(2017, 3, 17), schema_dict={"numeric": "decimal", "decimal": 2})
+    event = events.TextNode(
+        text=datetime(2017, 3, 17), schema_dict={"numeric": "decimal", "decimal": 2}
+    )
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == ""
-    assert cleaned_event.formatting_error == "1" # not a decimal
+    assert cleaned_event.formatting_error == "1"
 
     event = events.TextNode(
         text=datetime(2017, 3, 17), schema_dict={"not_numeric": "decimal"}
@@ -232,8 +281,11 @@ def test_clean_numeric():
     cleaned_event = list(cleaner.clean_numeric(event))[0]
     assert cleaned_event.text == datetime(2017, 3, 17)
 
+
 def test_clean_regex_string():
-    event = events.TextNode(text="AB1234567890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text="AB1234567890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"}
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == "AB1234567890"
     assert cleaned_event.formatting_error == "0"
@@ -243,38 +295,44 @@ def test_clean_regex_string():
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text=None, schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text=None, schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"}
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text="      AB1234567890    ", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text="      AB1234567890    ",
+        schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"},
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == "AB1234567890"
     assert cleaned_event.formatting_error == "0"
 
-    event = events.TextNode(text="AB123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text="AB123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"}
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "1"
 
-    event = events.TextNode(text="AB1234567890123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text="AB1234567890123456", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"}
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "1"
 
-    event = events.TextNode(text="AB12345 67890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text="AB12345 67890", schema_dict={"regex_string": r"[A-Za-z]{2}\d{10}"}
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == ""
     assert cleaned_event.formatting_error == "1"
 
-    event = events.TextNode(text="string", schema_dict={"not_regex_string": r"[A-Za-z]{2}\d{10}"})
+    event = events.TextNode(
+        text="string", schema_dict={"not_regex_string": r"[A-Za-z]{2}\d{10}"}
+    )
     cleaned_event = list(cleaner.clean_regex_string(event))[0]
     assert cleaned_event.text == "string"
-
-
-# test_clean_dates()
-# test_clean_categories()
-# test_clean_numeric()
-# test_clean_regex_string()
-
diff --git a/tests/social_work_workforce/test_converters.py b/tests/social_work_workforce/test_converters.py
index a4526ebe..203d095e 100644
--- a/tests/social_work_workforce/test_converters.py
+++ b/tests/social_work_workforce/test_converters.py
@@ -37,11 +37,30 @@ def test_to_numeric():
     assert converters.to_numeric("date", "") == "date"
     assert converters.to_numeric("", "decimal", decimal_places) == ""
     assert converters.to_numeric(None, "decimal", decimal_places) == ""
-    assert converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=0, max_inclusive=1) == 0.3
-    assert converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=0) == 0.3
-    assert converters.to_numeric("0.3", "decimal", decimal_places, max_inclusive=1) == 0.3
-    assert converters.to_numeric("1.99", "decimal", decimal_places, min_inclusive=0, max_inclusive=1) == "error"
-    assert converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=1, max_inclusive=99) == "error"
+    assert (
+        converters.to_numeric(
+            "0.3", "decimal", decimal_places, min_inclusive=0, max_inclusive=1
+        )
+        == 0.3
+    )
+    assert (
+        converters.to_numeric("0.3", "decimal", decimal_places, min_inclusive=0) == 0.3
+    )
+    assert (
+        converters.to_numeric("0.3", "decimal", decimal_places, max_inclusive=1) == 0.3
+    )
+    assert (
+        converters.to_numeric(
+            "1.99", "decimal", decimal_places, min_inclusive=0, max_inclusive=1
+        )
+        == "error"
+    )
+    assert (
+        converters.to_numeric(
+            "0.3", "decimal", decimal_places, min_inclusive=1, max_inclusive=99
+        )
+        == "error"
+    )
     assert converters.to_numeric("3000", "integer") == 3000
     assert converters.to_numeric(123, "integer") == 123
     assert converters.to_numeric("1.0", "integer") == 1
@@ -53,17 +72,11 @@ def test_to_numeric():
 
 
 def test_to_regex():
-    pattern=r"[A-Za-z]{2}\d{10}"
-    assert converters.to_regex("AB1234567890",pattern) == "AB1234567890" # match
-    assert converters.to_regex("  AB1234567890  ",pattern) == "AB1234567890" # match
-    assert converters.to_regex("AB1234567890123456",pattern) == "error" # too long
-    assert converters.to_regex("AB12345",pattern) == "error" # too short
-    assert converters.to_regex("xxxxOz2054309383",pattern) == "error" # invalid format
-    assert converters.to_regex("", pattern) == "" # no value
-    assert converters.to_regex(None, pattern) == "" # no value
-
-
-# test_to_category()
-# test_to_integer()
-# test_to_decimal()
-# test_to_regex()
+    pattern = r"[A-Za-z]{2}\d{10}"
+    assert converters.to_regex("AB1234567890", pattern) == "AB1234567890"  # match
+    assert converters.to_regex("  AB1234567890  ", pattern) == "AB1234567890"  # match
+    assert converters.to_regex("AB1234567890123456", pattern) == "error"  # too long
+    assert converters.to_regex("AB12345", pattern) == "error"  # too short
+    assert converters.to_regex("xxxxOz2054309383", pattern) == "error"  # invalid format
+    assert converters.to_regex("", pattern) == ""  # no value
+    assert converters.to_regex(None, pattern) == ""  # no value
diff --git a/tests/social_work_workforce/test_csww_record.py b/tests/social_work_workforce/test_csww_record.py
index 88cd3890..3b25973d 100644
--- a/tests/social_work_workforce/test_csww_record.py
+++ b/tests/social_work_workforce/test_csww_record.py
@@ -1,4 +1,3 @@
-# Import the unittest module and the code to be tested
 import unittest
 from sfdata_stream_parser.events import StartElement, EndElement, TextNode
 from liiatools.datasets.social_work_workforce.lds_csww_clean.csww_record import (
@@ -8,7 +7,6 @@
     LALevelEvent,
     HeaderEvent,
 )
-from liiatools.datasets.social_work_workforce.lds_csww_clean.xml import dom_parse
 
 
 class TestRecord(unittest.TestCase):
@@ -73,8 +71,3 @@ def test_message_collector(self):
         self.assertEqual(
             test_events[2].record, {"ID": "100", "SWENo": "AB123456789", "Agency": "0"}
         )
-
-
-# Run the tests
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/social_work_workforce/test_logger.py b/tests/social_work_workforce/test_logger.py
index ad00a80b..233d0863 100644
--- a/tests/social_work_workforce/test_logger.py
+++ b/tests/social_work_workforce/test_logger.py
@@ -1,8 +1,3 @@
-import tempfile as tmp
-from unittest.mock import patch
-from pathlib import Path
-from datetime import datetime
-
 from liiatools.datasets.social_work_workforce.lds_csww_clean import logger
 
 from sfdata_stream_parser import events
@@ -62,11 +57,21 @@ def test_create_formatting_error_list():
 def test_blank_error_check():
     mock_stream = logger.blank_error_check(
         [
-            events.TextNode(schema_dict={"canbeblank": False}, text="", formatting_error="0"),
-            events.TextNode(schema_dict={"canbeblank": False}, text=None, formatting_error="0"),
-            events.TextNode(schema_dict={"canbeblank": False}, text="", formatting_error="1"),
-            events.TextNode(schema_dict={"canbeblank": False}, text="string", formatting_error="0"),
-            events.TextNode(schema_dict={"canbeblank": True}, text="", formatting_error="0"),
+            events.TextNode(
+                schema_dict={"canbeblank": False}, text="", formatting_error="0"
+            ),
+            events.TextNode(
+                schema_dict={"canbeblank": False}, text=None, formatting_error="0"
+            ),
+            events.TextNode(
+                schema_dict={"canbeblank": False}, text="", formatting_error="1"
+            ),
+            events.TextNode(
+                schema_dict={"canbeblank": False}, text="string", formatting_error="0"
+            ),
+            events.TextNode(
+                schema_dict={"canbeblank": True}, text="", formatting_error="0"
+            ),
         ]
     )
     stream = list(mock_stream)
@@ -89,13 +94,25 @@ def test_create_blank_error_list():
     events_with_blank_error_list = list(logger.create_blank_error_list(mock_stream))
     for event in events_with_blank_error_list:
         if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
-            print(event.blank_error_list)
             assert event.blank_error_list == [
                 "some_header",
                 "some_header_2",
             ]
 
 
-test_create_formatting_error_list()
-test_blank_error_check()
-test_create_blank_error_list()
+def test_create_validation_error_list():
+    mock_stream = (
+        events.StartElement(tag="LALevelVacancies"),
+        events.TextNode(text="some_header", validation_error_list="error_message"),
+        events.TextNode(text="some_header_2", validation_error_list="error_message_2"),
+        events.TextNode(text="some_header_3", validation_error_list=""),
+        events.TextNode(text="some_header_4"),
+        events.EndElement(tag="Message"),
+    )
+    events_with_validation_error_list = list(logger.create_validation_error_list(mock_stream))
+    for event in events_with_validation_error_list:
+        if isinstance(event, logger.ErrorTable) and event.as_dict() != {}:
+            assert event.validation_error_list == [
+                "error_message",
+                "error_message_2",
+            ]

From d357595558ef229839e3949556258d2f7e7eed69 Mon Sep 17 00:00:00 2001
From: patrick-troy <58770937+patrick-troy@users.noreply.github.com>
Date: Wed, 8 Nov 2023 14:12:37 +0200
Subject: [PATCH 39/40] update parameters for save_incorrect_year_error call

---
 liiatools/datasets/social_work_workforce/csww_cli.py            | 2 +-
 liiatools/datasets/social_work_workforce/csww_main_functions.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/liiatools/datasets/social_work_workforce/csww_cli.py b/liiatools/datasets/social_work_workforce/csww_cli.py
index 486e132c..e50db088 100644
--- a/liiatools/datasets/social_work_workforce/csww_cli.py
+++ b/liiatools/datasets/social_work_workforce/csww_cli.py
@@ -20,7 +20,7 @@
 @click.group()
 def csww():
     """
-    Functions for creating CSWW Census sample file generator
+    Functions for cleaning, minimising and aggregating CSWW files
     """
     pass
 
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 6b88566f..b677fbca 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -126,7 +126,7 @@ def cleanfile(input, la_code, la_log_dir, output):
         )
         is False
     ):
-        save_incorrect_year_error(input, la_log_dir)
+        save_incorrect_year_error(input, la_log_dir, retention_period=YEARS_TO_GO_BACK-1)
         return
 
     # Configure stream

From 528ee0c59cd2609f448a029c783fa3bbd03c162e Mon Sep 17 00:00:00 2001
From: patrick-troy <58770937+patrick-troy@users.noreply.github.com>
Date: Wed, 8 Nov 2023 14:33:28 +0200
Subject: [PATCH 40/40] remove unnecessary folders

---
 .../csww/BAD/social_work_workforce_2021.xml   | 556 ------------------
 .../csww/NEW/social_work_workforce_2022.xml   | 556 ------------------
 .../samples/flatfiles/BAD/la_log/blank.txt    |   0
 .../samples/log_files/blank.txt               |   0
 .../samples/outputs/blank.txt                 |   0
 .../samples/request/blank.txt                 |   0
 .../BAD => }/social_work_workforce_2022.xml   |   0
 .../social_work_workforce_2021.xsd            | 254 --------
 8 files changed, 1366 deletions(-)
 delete mode 100644 liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
 delete mode 100644 liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
 delete mode 100644 liiatools/spec/social_work_workforce/samples/flatfiles/BAD/la_log/blank.txt
 delete mode 100644 liiatools/spec/social_work_workforce/samples/log_files/blank.txt
 delete mode 100644 liiatools/spec/social_work_workforce/samples/outputs/blank.txt
 delete mode 100644 liiatools/spec/social_work_workforce/samples/request/blank.txt
 rename liiatools/spec/social_work_workforce/samples/{csww/BAD => }/social_work_workforce_2022.xml (100%)
 delete mode 100644 liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd

diff --git a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
deleted file mode 100644
index 8ef1b9ef..00000000
--- a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
+++ /dev/null
@@ -1,556 +0,0 @@
-<Message>
-  <Header>
-    <CollectionDetails>
-      <Collection>CSWW</Collection>
-      <Year>2021</Year>
-      <ReferenceDate>2021-09-30</ReferenceDate>
-    </CollectionDetails>
-    <Source>
-      <SourceLevel>L</SourceLevel>
-      <LEA>301</LEA>
-      <SoftwareCode>liiatools.datasets.social_work_workforce.sample_data</SoftwareCode>
-      <DateTime>2023-03-28T14:54:55Z</DateTime>
-    </Source>
-  </Header>
-  <LALevelVacancies>
-    <NumberOfVacancies>66.66</NumberOfVacancies>
-    <NoAgencyFTE>40.40</NoAgencyFTE>
-    <NoAgencyHeadcount>100</NoAgencyHeadcount>
-  </LALevelVacancies>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Ox2054309383</SWENo>
-    <FTE>0.521371</FTE>
-    <PersonBirthDate>1969-01-22</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <Ethnicity>REFU</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>0</StepUpGrad>
-    <RoleStartDate>1988-04-07</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <Cases30>72</Cases30>
-    <WorkingDaysLost>15.31</WorkingDaysLost>
-    <ContractWeeks>288.7</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <Absat30Sept>0</Absat30Sept>
-    <ReasonAbsence>TRN</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Yk7226043359</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1958-04-07</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>2</OrgRole>
-    <StartOrigin>8</StartOrigin>
-    <RoleEndDate>2019-09-23</RoleEndDate>
-    <LeaverDestination>7</LeaverDestination>
-    <ReasonLeave>10</ReasonLeave>
-    <FTE30>0.603665</FTE30>
-    <Cases30>66</Cases30>
-    <WorkingDaysLost>29.87</WorkingDaysLost>
-    <ContractWeeks>2.5</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>iP8098309864</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1984-01-12</PersonBirthDate>
-    <Ethnicity>APKN</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>5</OrgRole>
-    <RoleStartDate>2014-01-26</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>3</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <FTE30>0.23246</FTE30>
-    <WorkingDaysLost>92.56</WorkingDaysLost>
-    <ContractWeeks>213.4</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>oP8178849586</SWENo>
-    <FTE>0.899676</FTE>
-    <PersonBirthDate>1990-09-28</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <Ethnicity>BAFR</Ethnicity>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>2</StartOrigin>
-    <FTE30>0.429963</FTE30>
-    <WorkingDaysLost>14.39</WorkingDaysLost>
-    <FrontlineGrad>0</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>nH9419631053</SWENo>
-    <FTE>0.133587</FTE>
-    <GenderCurrent>2</GenderCurrent>
-    <Ethnicity>AIND</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2017-06-10</RoleStartDate>
-    <FTE30>0.436348</FTE30>
-    <WorkingDaysLost>5.39</WorkingDaysLost>
-    <ContractWeeks>475.7</ContractWeeks>
-    <Absat30Sept>1</Absat30Sept>
-    <ReasonAbsence>UNP</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>JJ3661684122</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1993-05-19</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <QualLevel>3</QualLevel>
-    <RoleStartDate>2020-06-14</RoleStartDate>
-    <StartOrigin>3</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>1</LeaverDestination>
-    <ReasonLeave>5</ReasonLeave>
-    <FTE30>0.903669</FTE30>
-    <Cases30>11</Cases30>
-    <ContractWeeks>141.0</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>tN2120744892</SWENo>
-    <FTE>0.803122</FTE>
-    <Ethnicity>WBRI</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <FTE30>0.964327</FTE30>
-    <WorkingDaysLost>95.06</WorkingDaysLost>
-    <ContractWeeks>403.6</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Zo9779760045</SWENo>
-    <FTE>0.767688</FTE>
-    <PersonBirthDate>1996-08-31</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>MWAS</Ethnicity>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>5</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <Cases30>62</Cases30>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>wf3752370095</SWENo>
-    <FTE>0.843488</FTE>
-    <PersonBirthDate>1959-04-17</PersonBirthDate>
-    <GenderCurrent>2</GenderCurrent>
-    <Ethnicity>APKN</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <RoleStartDate>1997-10-01</RoleStartDate>
-    <StartOrigin>2</StartOrigin>
-    <FTE30>0.712824</FTE30>
-    <WorkingDaysLost>16.74</WorkingDaysLost>
-    <ContractWeeks>456.3</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>OW2475789301</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1971-10-02</PersonBirthDate>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>1993-10-04</RoleStartDate>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>3</LeaverDestination>
-    <ReasonLeave>6</ReasonLeave>
-    <FTE30>0.908092</FTE30>
-    <Cases30>45</Cases30>
-    <WorkingDaysLost>22.98</WorkingDaysLost>
-    <ContractWeeks>441.5</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>3</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Kv3016593719</SWENo>
-    <FTE>0.12232</FTE>
-    <PersonBirthDate>1996-06-05</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <Ethnicity>BAFR</Ethnicity>
-    <QualLevel>2</QualLevel>
-    <OrgRole>6</OrgRole>
-    <StartOrigin>10</StartOrigin>
-    <FTE30>0.641824</FTE30>
-    <Cases30>23</Cases30>
-    <WorkingDaysLost>36.13</WorkingDaysLost>
-    <ContractWeeks>213.1</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>TB9669555723</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1987-10-30</PersonBirthDate>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>2</OrgRole>
-    <RoleStartDate>2012-10-02</RoleStartDate>
-    <StartOrigin>2</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>6</LeaverDestination>
-    <ReasonLeave>7</ReasonLeave>
-    <Cases30>37</Cases30>
-    <WorkingDaysLost>90.85</WorkingDaysLost>
-    <ContractWeeks>28.5</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <Absat30Sept>1</Absat30Sept>
-    <ReasonAbsence>UNA</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>QK8499162867</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1968-11-27</PersonBirthDate>
-    <Ethnicity>ABAN</Ethnicity>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2018-08-03</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>2</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <FTE30>0.078464</FTE30>
-    <WorkingDaysLost>43.02</WorkingDaysLost>
-    <ContractWeeks>154.7</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Wr5514040878</SWENo>
-    <FTE>0</FTE>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>AOTH</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>2</OrgRole>
-    <RoleStartDate>2015-04-24</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>1</LeaverDestination>
-    <ReasonLeave>6</ReasonLeave>
-    <WorkingDaysLost>3.51</WorkingDaysLost>
-    <ContractWeeks>424.0</ContractWeeks>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Aj9242652291</SWENo>
-    <FTE>0.859218</FTE>
-    <PersonBirthDate>1968-12-31</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>BCRB</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>2003-09-12</RoleStartDate>
-    <StartOrigin>5</StartOrigin>
-    <FTE30>0.320526</FTE30>
-    <Cases30>85</Cases30>
-    <WorkingDaysLost>98.22</WorkingDaysLost>
-    <ContractWeeks>206.6</ContractWeeks>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Jv2635496195</SWENo>
-    <FTE>0.021911</FTE>
-    <PersonBirthDate>1977-06-27</PersonBirthDate>
-    <Ethnicity>REFU</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2022-10-08</RoleStartDate>
-    <StartOrigin>6</StartOrigin>
-    <FTE30>0.69819</FTE30>
-    <Cases30>25</Cases30>
-    <WorkingDaysLost>29.19</WorkingDaysLost>
-    <FrontlineGrad>1</FrontlineGrad>
-    <Absat30Sept>1</Absat30Sept>
-    <ReasonAbsence>SIC</ReasonAbsence>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>To5555885076</SWENo>
-    <FTE>0.786453</FTE>
-    <PersonBirthDate>1996-11-18</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>MWAS</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>4</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>1</StartOrigin>
-    <FTE30>0.441344</FTE30>
-    <Cases30>83</Cases30>
-    <WorkingDaysLost>78.29</WorkingDaysLost>
-    <ContractWeeks>364.4</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>rK9218104079</SWENo>
-    <FTE>0.491425</FTE>
-    <PersonBirthDate>1998-04-15</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.939826</FTE30>
-    <WorkingDaysLost>3.1</WorkingDaysLost>
-    <ContractWeeks>415.3</ContractWeeks>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>cD9282390165</SWENo>
-    <FTE>0.192894</FTE>
-    <PersonBirthDate>1959-09-25</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>REFU</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>3</OrgRole>
-    <RoleStartDate>1985-12-12</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <FTE30>0.18449</FTE30>
-    <Cases30>14</Cases30>
-    <ContractWeeks>188.4</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>zU6140515687</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1962-11-04</PersonBirthDate>
-    <Ethnicity>WBRI</Ethnicity>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>3</OrgRole>
-    <RoleStartDate>1999-07-14</RoleStartDate>
-    <RoleEndDate>2018-08-20</RoleEndDate>
-    <LeaverDestination>8</LeaverDestination>
-    <ReasonLeave>3</ReasonLeave>
-    <FTE30>0.222573</FTE30>
-    <Cases30>65</Cases30>
-    <WorkingDaysLost>16.26</WorkingDaysLost>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>ih3342923522</SWENo>
-    <FTE>0.862474</FTE>
-    <PersonBirthDate>1992-02-18</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>WBRI</Ethnicity>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.761443</FTE30>
-    <Cases30>39</Cases30>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>cm3809724991</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>2001-10-29</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <Ethnicity>AIND</Ethnicity>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <StartOrigin>1</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>5</LeaverDestination>
-    <ReasonLeave>4</ReasonLeave>
-    <FTE30>0.530908</FTE30>
-    <Cases30>29</Cases30>
-    <WorkingDaysLost>38.71</WorkingDaysLost>
-    <ContractWeeks>339.9</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>PA8564166424</SWENo>
-    <FTE>0.668266</FTE>
-    <PersonBirthDate>1983-04-13</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>3</StartOrigin>
-    <FTE30>0.707445</FTE30>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>QW8564363911</SWENo>
-    <FTE>0.978729</FTE>
-    <PersonBirthDate>1958-04-26</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <Ethnicity>MWBA</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <RoleStartDate>2002-01-31</RoleStartDate>
-    <StartOrigin>1</StartOrigin>
-    <FTE30>0.698641</FTE30>
-    <ContractWeeks>121.9</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>3</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>PQ5842914246</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1989-06-05</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2011-08-31</RoleStartDate>
-    <StartOrigin>5</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>7</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <FTE30>0.443976</FTE30>
-    <Cases30>70</Cases30>
-    <WorkingDaysLost>12.2</WorkingDaysLost>
-    <ContractWeeks>301.3</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>ZQ9393137749</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1981-09-21</PersonBirthDate>
-    <Ethnicity>CHNE</Ethnicity>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2001-02-10</RoleStartDate>
-    <StartOrigin>6</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>4</LeaverDestination>
-    <ReasonLeave>1</ReasonLeave>
-    <FTE30>0.821627</FTE30>
-    <WorkingDaysLost>94.67</WorkingDaysLost>
-    <ContractWeeks>471.5</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Pv9093835426</SWENo>
-    <FTE>0.561974</FTE>
-    <Ethnicity>OOTH</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2014-09-30</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.965936</FTE30>
-    <Cases30>63</Cases30>
-    <WorkingDaysLost>87.59</WorkingDaysLost>
-    <Absat30Sept>0</Absat30Sept>
-    <ReasonAbsence>SIC</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>eW7601111729</SWENo>
-    <FTE>0</FTE>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>1993-04-18</RoleStartDate>
-    <StartOrigin>3</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>7</LeaverDestination>
-    <ReasonLeave>1</ReasonLeave>
-    <FTE30>0.63075</FTE30>
-    <Cases30>80</Cases30>
-    <ContractWeeks>299.1</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Jd1465867330</SWENo>
-    <FTE>0.034436</FTE>
-    <GenderCurrent>2</GenderCurrent>
-    <Ethnicity>APKN</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <RoleStartDate>1997-01-11</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.22182</FTE30>
-    <Cases30>23</Cases30>
-    <WorkingDaysLost>83.01</WorkingDaysLost>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>3</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>od1620971821</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1975-01-19</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <Ethnicity>WOTH</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>2016-08-20</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>5</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <Cases30>87</Cases30>
-    <WorkingDaysLost>13.01</WorkingDaysLost>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-</Message>
\ No newline at end of file
diff --git a/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml b/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
deleted file mode 100644
index 8c3fc4a5..00000000
--- a/liiatools/spec/social_work_workforce/samples/csww/NEW/social_work_workforce_2022.xml
+++ /dev/null
@@ -1,556 +0,0 @@
-<Message>
-  <Header>
-    <CollectionDetails>
-      <Collection>CSWW</Collection>
-      <Year>2022</Year>
-      <ReferenceDate>2022-09-30</ReferenceDate>
-    </CollectionDetails>
-    <Source>
-      <SourceLevel>L</SourceLevel>
-      <LEA>316</LEA>
-      <SoftwareCode>liiatools.datasets.social_work_workforce.sample_data</SoftwareCode>
-      <DateTime>2023-03-28T14:54:55Z</DateTime>
-    </Source>
-  </Header>
-  <LALevelVacancies>
-    <NumberOfVacancies>99.68</NumberOfVacancies>
-    <NoAgencyFTE>75.82</NoAgencyFTE>
-    <NoAgencyHeadcount>142</NoAgencyHeadcount>
-  </LALevelVacancies>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Oy2054309383</SWENo>
-    <FTE>0.521371</FTE>
-    <PersonBirthDate>1977-08-10</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <Ethnicity>REFU</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>0</StepUpGrad>
-    <RoleStartDate>1988-04-07</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <Cases30>72</Cases30>
-    <WorkingDaysLost>15.31</WorkingDaysLost>
-    <ContractWeeks>288.7</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <Absat30Sept>0</Absat30Sept>
-    <ReasonAbsence>TRN</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Yk7226043359</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1958-04-07</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>2</OrgRole>
-    <StartOrigin>8</StartOrigin>
-    <RoleEndDate>2019-09-23</RoleEndDate>
-    <LeaverDestination>7</LeaverDestination>
-    <ReasonLeave>10</ReasonLeave>
-    <FTE30>0.603665</FTE30>
-    <Cases30>66</Cases30>
-    <WorkingDaysLost>29.87</WorkingDaysLost>
-    <ContractWeeks>2.5</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>iP8098309864</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1984-01-12</PersonBirthDate>
-    <Ethnicity>APKN</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>5</OrgRole>
-    <RoleStartDate>2014-01-26</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>3</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <FTE30>0.23246</FTE30>
-    <WorkingDaysLost>92.56</WorkingDaysLost>
-    <ContractWeeks>213.4</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>oP8178849586</SWENo>
-    <FTE>0.899676</FTE>
-    <PersonBirthDate>1990-09-28</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <Ethnicity>BAFR</Ethnicity>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>2</StartOrigin>
-    <FTE30>0.429963</FTE30>
-    <WorkingDaysLost>14.39</WorkingDaysLost>
-    <FrontlineGrad>0</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>nH9419631053</SWENo>
-    <FTE>0.133587</FTE>
-    <GenderCurrent>2</GenderCurrent>
-    <Ethnicity>AIND</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2017-06-10</RoleStartDate>
-    <FTE30>0.436348</FTE30>
-    <WorkingDaysLost>5.39</WorkingDaysLost>
-    <ContractWeeks>475.7</ContractWeeks>
-    <Absat30Sept>1</Absat30Sept>
-    <ReasonAbsence>UNP</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>JJ3661684122</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1993-05-19</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <QualLevel>3</QualLevel>
-    <RoleStartDate>2020-06-14</RoleStartDate>
-    <StartOrigin>3</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>1</LeaverDestination>
-    <ReasonLeave>5</ReasonLeave>
-    <FTE30>0.903669</FTE30>
-    <Cases30>11</Cases30>
-    <ContractWeeks>141.0</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>tN2120744892</SWENo>
-    <FTE>0.803122</FTE>
-    <Ethnicity>WBRI</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <FTE30>0.964327</FTE30>
-    <WorkingDaysLost>95.06</WorkingDaysLost>
-    <ContractWeeks>403.6</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Zo9779760045</SWENo>
-    <FTE>0.767688</FTE>
-    <PersonBirthDate>1996-08-31</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>MWAS</Ethnicity>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>5</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <Cases30>62</Cases30>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>wf3752370095</SWENo>
-    <FTE>0.843488</FTE>
-    <PersonBirthDate>1959-04-17</PersonBirthDate>
-    <GenderCurrent>2</GenderCurrent>
-    <Ethnicity>APKN</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <RoleStartDate>1997-10-01</RoleStartDate>
-    <StartOrigin>2</StartOrigin>
-    <FTE30>0.712824</FTE30>
-    <WorkingDaysLost>16.74</WorkingDaysLost>
-    <ContractWeeks>456.3</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>OW2475789301</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1971-10-02</PersonBirthDate>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>1993-10-04</RoleStartDate>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>3</LeaverDestination>
-    <ReasonLeave>6</ReasonLeave>
-    <FTE30>0.908092</FTE30>
-    <Cases30>45</Cases30>
-    <WorkingDaysLost>22.98</WorkingDaysLost>
-    <ContractWeeks>441.5</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>3</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Kv3016593719</SWENo>
-    <FTE>0.12232</FTE>
-    <PersonBirthDate>1996-06-05</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <Ethnicity>BAFR</Ethnicity>
-    <QualLevel>2</QualLevel>
-    <OrgRole>6</OrgRole>
-    <StartOrigin>10</StartOrigin>
-    <FTE30>0.641824</FTE30>
-    <Cases30>23</Cases30>
-    <WorkingDaysLost>36.13</WorkingDaysLost>
-    <ContractWeeks>213.1</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>TB9669555723</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1987-10-30</PersonBirthDate>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>2</OrgRole>
-    <RoleStartDate>2012-10-02</RoleStartDate>
-    <StartOrigin>2</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>6</LeaverDestination>
-    <ReasonLeave>7</ReasonLeave>
-    <Cases30>37</Cases30>
-    <WorkingDaysLost>90.85</WorkingDaysLost>
-    <ContractWeeks>28.5</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <Absat30Sept>1</Absat30Sept>
-    <ReasonAbsence>UNA</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>QK8499162867</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1968-11-27</PersonBirthDate>
-    <Ethnicity>ABAN</Ethnicity>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2018-08-03</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>2</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <FTE30>0.078464</FTE30>
-    <WorkingDaysLost>43.02</WorkingDaysLost>
-    <ContractWeeks>154.7</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Wr5514040878</SWENo>
-    <FTE>0</FTE>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>AOTH</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>2</OrgRole>
-    <RoleStartDate>2015-04-24</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>1</LeaverDestination>
-    <ReasonLeave>6</ReasonLeave>
-    <WorkingDaysLost>3.51</WorkingDaysLost>
-    <ContractWeeks>424.0</ContractWeeks>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Aj9242652291</SWENo>
-    <FTE>0.859218</FTE>
-    <PersonBirthDate>1968-12-31</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>BCRB</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>2003-09-12</RoleStartDate>
-    <StartOrigin>5</StartOrigin>
-    <FTE30>0.320526</FTE30>
-    <Cases30>85</Cases30>
-    <WorkingDaysLost>98.22</WorkingDaysLost>
-    <ContractWeeks>206.6</ContractWeeks>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Jv2635496195</SWENo>
-    <FTE>0.021911</FTE>
-    <PersonBirthDate>1977-06-27</PersonBirthDate>
-    <Ethnicity>REFU</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2022-10-08</RoleStartDate>
-    <StartOrigin>6</StartOrigin>
-    <FTE30>0.69819</FTE30>
-    <Cases30>25</Cases30>
-    <WorkingDaysLost>29.19</WorkingDaysLost>
-    <FrontlineGrad>1</FrontlineGrad>
-    <Absat30Sept>1</Absat30Sept>
-    <ReasonAbsence>SIC</ReasonAbsence>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>To5555885076</SWENo>
-    <FTE>0.786453</FTE>
-    <PersonBirthDate>1996-11-18</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>MWAS</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>4</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>1</StartOrigin>
-    <FTE30>0.441344</FTE30>
-    <Cases30>83</Cases30>
-    <WorkingDaysLost>78.29</WorkingDaysLost>
-    <ContractWeeks>364.4</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>rK9218104079</SWENo>
-    <FTE>0.491425</FTE>
-    <PersonBirthDate>1998-04-15</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.939826</FTE30>
-    <WorkingDaysLost>3.1</WorkingDaysLost>
-    <ContractWeeks>415.3</ContractWeeks>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>cD9282390165</SWENo>
-    <FTE>0.192894</FTE>
-    <PersonBirthDate>1959-09-25</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>REFU</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>3</OrgRole>
-    <RoleStartDate>1985-12-12</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <FTE30>0.18449</FTE30>
-    <Cases30>14</Cases30>
-    <ContractWeeks>188.4</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>zU6140515687</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1962-11-04</PersonBirthDate>
-    <Ethnicity>WBRI</Ethnicity>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>3</OrgRole>
-    <RoleStartDate>1999-07-14</RoleStartDate>
-    <RoleEndDate>2018-08-20</RoleEndDate>
-    <LeaverDestination>8</LeaverDestination>
-    <ReasonLeave>3</ReasonLeave>
-    <FTE30>0.222573</FTE30>
-    <Cases30>65</Cases30>
-    <WorkingDaysLost>16.26</WorkingDaysLost>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>ih3342923522</SWENo>
-    <FTE>0.862474</FTE>
-    <PersonBirthDate>1992-02-18</PersonBirthDate>
-    <GenderCurrent>0</GenderCurrent>
-    <Ethnicity>WBRI</Ethnicity>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.761443</FTE30>
-    <Cases30>39</Cases30>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>cm3809724991</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>2001-10-29</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <Ethnicity>AIND</Ethnicity>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <StartOrigin>1</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>5</LeaverDestination>
-    <ReasonLeave>4</ReasonLeave>
-    <FTE30>0.530908</FTE30>
-    <Cases30>29</Cases30>
-    <WorkingDaysLost>38.71</WorkingDaysLost>
-    <ContractWeeks>339.9</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>PA8564166424</SWENo>
-    <FTE>0.668266</FTE>
-    <PersonBirthDate>1983-04-13</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <QualInst>Institution Name</QualInst>
-    <StepUpGrad>1</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>2023-03-28</RoleStartDate>
-    <StartOrigin>3</StartOrigin>
-    <FTE30>0.707445</FTE30>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>QW8564363911</SWENo>
-    <FTE>0.978729</FTE>
-    <PersonBirthDate>1958-04-26</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <Ethnicity>MWBA</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <RoleStartDate>2002-01-31</RoleStartDate>
-    <StartOrigin>1</StartOrigin>
-    <FTE30>0.698641</FTE30>
-    <ContractWeeks>121.9</ContractWeeks>
-    <FrontlineGrad>1</FrontlineGrad>
-    <CFKSSstatus>3</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>PQ5842914246</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1989-06-05</PersonBirthDate>
-    <GenderCurrent>1</GenderCurrent>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2011-08-31</RoleStartDate>
-    <StartOrigin>5</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>7</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <FTE30>0.443976</FTE30>
-    <Cases30>70</Cases30>
-    <WorkingDaysLost>12.2</WorkingDaysLost>
-    <ContractWeeks>301.3</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>ZQ9393137749</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1981-09-21</PersonBirthDate>
-    <Ethnicity>CHNE</Ethnicity>
-    <QualLevel>1</QualLevel>
-    <StepUpGrad>1</StepUpGrad>
-    <RoleStartDate>2001-02-10</RoleStartDate>
-    <StartOrigin>6</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>4</LeaverDestination>
-    <ReasonLeave>1</ReasonLeave>
-    <FTE30>0.821627</FTE30>
-    <WorkingDaysLost>94.67</WorkingDaysLost>
-    <ContractWeeks>471.5</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>2</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>Pv9093835426</SWENo>
-    <FTE>0.561974</FTE>
-    <Ethnicity>OOTH</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>6</OrgRole>
-    <RoleStartDate>2014-09-30</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.965936</FTE30>
-    <Cases30>63</Cases30>
-    <WorkingDaysLost>87.59</WorkingDaysLost>
-    <Absat30Sept>0</Absat30Sept>
-    <ReasonAbsence>SIC</ReasonAbsence>
-    <CFKSSstatus>1</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>eW7601111729</SWENo>
-    <FTE>0</FTE>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>3</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>1993-04-18</RoleStartDate>
-    <StartOrigin>3</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>7</LeaverDestination>
-    <ReasonLeave>1</ReasonLeave>
-    <FTE30>0.63075</FTE30>
-    <Cases30>80</Cases30>
-    <ContractWeeks>299.1</ContractWeeks>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>4</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>0</AgencyWorker>
-    <SWENo>Jd1465867330</SWENo>
-    <FTE>0.034436</FTE>
-    <GenderCurrent>2</GenderCurrent>
-    <Ethnicity>APKN</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>1</QualLevel>
-    <RoleStartDate>1997-01-11</RoleStartDate>
-    <StartOrigin>4</StartOrigin>
-    <FTE30>0.22182</FTE30>
-    <Cases30>23</Cases30>
-    <WorkingDaysLost>83.01</WorkingDaysLost>
-    <FrontlineGrad>0</FrontlineGrad>
-    <CFKSSstatus>3</CFKSSstatus>
-  </CSWWWorker>
-  <CSWWWorker>
-    <AgencyWorker>1</AgencyWorker>
-    <SWENo>od1620971821</SWENo>
-    <FTE>0</FTE>
-    <PersonBirthDate>1975-01-19</PersonBirthDate>
-    <GenderCurrent>9</GenderCurrent>
-    <Ethnicity>WOTH</Ethnicity>
-    <QualInst>Institution Name</QualInst>
-    <QualLevel>2</QualLevel>
-    <StepUpGrad>0</StepUpGrad>
-    <OrgRole>1</OrgRole>
-    <RoleStartDate>2016-08-20</RoleStartDate>
-    <StartOrigin>9</StartOrigin>
-    <RoleEndDate>2023-03-28</RoleEndDate>
-    <LeaverDestination>5</LeaverDestination>
-    <ReasonLeave>9</ReasonLeave>
-    <Cases30>87</Cases30>
-    <WorkingDaysLost>13.01</WorkingDaysLost>
-    <FrontlineGrad>1</FrontlineGrad>
-  </CSWWWorker>
-</Message>
\ No newline at end of file
diff --git a/liiatools/spec/social_work_workforce/samples/flatfiles/BAD/la_log/blank.txt b/liiatools/spec/social_work_workforce/samples/flatfiles/BAD/la_log/blank.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/liiatools/spec/social_work_workforce/samples/log_files/blank.txt b/liiatools/spec/social_work_workforce/samples/log_files/blank.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/liiatools/spec/social_work_workforce/samples/outputs/blank.txt b/liiatools/spec/social_work_workforce/samples/outputs/blank.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/liiatools/spec/social_work_workforce/samples/request/blank.txt b/liiatools/spec/social_work_workforce/samples/request/blank.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml b/liiatools/spec/social_work_workforce/samples/social_work_workforce_2022.xml
similarity index 100%
rename from liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
rename to liiatools/spec/social_work_workforce/samples/social_work_workforce_2022.xml
diff --git a/liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd b/liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd
deleted file mode 100644
index bc9f98a3..00000000
--- a/liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd
+++ /dev/null
@@ -1,254 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
-
-  <xs:element name="Message" type="messagetype"/>
-
-  <xs:complexType name="messagetype">
-    <xs:sequence>
-      <xs:element name="Header" type="headertype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="LALevelVacancies" type="vacancytype" minOccurs="1" maxOccurs="1"/>
-	  <xs:element name="CSWWWorker" type="workertype" minOccurs="1" maxOccurs="unbounded"/>
-    </xs:sequence>
-  </xs:complexType>
-
-  <xs:complexType name="headertype">
-    <xs:sequence>
-      <xs:element name="CollectionDetails" type="collectiondetailstype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="Source" type="sourcetype" minOccurs="0" maxOccurs="unbounded"/>
-    </xs:sequence>
-  </xs:complexType>
-
-  <xs:complexType name="collectiondetailstype">
-    <xs:sequence>
-      <xs:element name="Collection" minOccurs="0" maxOccurs="1">
-        <xs:simpleType>
-          <xs:restriction base="xs:string">
-            <xs:enumeration value="CSWW"/>
-          </xs:restriction>
-        </xs:simpleType>
-      </xs:element>
-      <xs:element name="Year" type="xs:gYear" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="ReferenceDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
-    </xs:sequence>
-  </xs:complexType>
-
-  <xs:complexType name="sourcetype">
-    <xs:sequence>
-      <xs:element name="SourceLevel" minOccurs="0" maxOccurs="1">
-        <xs:simpleType>
-          <xs:restriction base="xs:string">
-            <xs:enumeration value="L"/>
-          </xs:restriction>
-        </xs:simpleType>
-      </xs:element>
-      <xs:element name="LEA" minOccurs="1" maxOccurs="1">
-        <xs:simpleType>
-          <xs:restriction base="xs:string" >
-            <xs:pattern value="\d{3}"/>
-          </xs:restriction>
-        </xs:simpleType>
-      </xs:element>
-      <xs:element name="SoftwareCode" type="xs:string" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="DateTime" type="xs:dateTime" minOccurs="0" maxOccurs="1"/>
-    </xs:sequence>
-  </xs:complexType>
-
-  <xs:complexType name="vacancytype">
-    <xs:sequence>
-      <xs:element name="NumberOfVacancies" type="twodecimalplaces" minOccurs="1" maxOccurs="1"/>
-	  <xs:element name="NoAgencyFTE" type="twodecimalplaces" minOccurs="1" maxOccurs="1"/>
-	  <xs:element name="NoAgencyHeadcount" type="xs:integer" minOccurs="1" maxOccurs="1"/>
-    </xs:sequence>
-  </xs:complexType>
-
-  <xs:complexType name="workertype">
-    <xs:sequence>
-      <xs:element name="AgencyWorker" type="agencyworkertype" minOccurs="1" maxOccurs="1"/>
-      <xs:element name="SWENo" type="swetype" minOccurs="1" maxOccurs="1"/>
-      <xs:element name="FTE" type="ftetype" minOccurs="0" maxOccurs="1"/>
-	  <xs:element name="PersonBirthDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="GenderCurrent" type="gendertype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="Ethnicity" type="ethnicitytype" minOccurs="0" maxOccurs="1"/>
-	  <xs:element name="QualInst" type="xs:string" minOccurs="1" maxOccurs="unbounded"/>
-      <xs:element name="QualLevel" type="qualtype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="StepUpGrad" type="yesnotype" minOccurs="0" maxOccurs="1"/>
-	  <xs:element name="OrgRole" type="roletype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="RoleStartDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="StartOrigin" type="origintype" minOccurs="0" maxOccurs="1"/>
-	  <xs:element name="RoleEndDate" type="xs:date" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="LeaverDestination" type="leavertype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="ReasonLeave" type="reasontype" minOccurs="0" maxOccurs="1"/>
-	  <xs:element name="FTE30" type="ftetype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="Cases30" type="xs:integer" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="WorkingDaysLost" type="twodecimalplaces" minOccurs="0" maxOccurs="1"/>
-	  <xs:element name="ContractWeeks" type="onedecimalplace" minOccurs="0" maxOccurs="unbounded"/>
-      <xs:element name="FrontlineGrad" type="yesnotype" minOccurs="0" maxOccurs="1"/>
-      <xs:element name="Absat30Sept" type="yesnotype" minOccurs="0" maxOccurs="unbounded"/>
-	  <xs:element name="ReasonAbsence" type="absencetype" minOccurs="0" maxOccurs="unbounded"/>
-      <xs:element name="CFKSSstatus" type="cfksstype" minOccurs="0" maxOccurs="unbounded"/>
-    </xs:sequence>
-  </xs:complexType>
-  
-  <xs:simpleType name="onedecimalplace">
-    <xs:restriction base="xs:decimal">
-        <xs:fractionDigits fixed="true" value="1"/>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="twodecimalplaces">
-    <xs:restriction base="xs:decimal">
-        <xs:fractionDigits fixed="true" value="2"/>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="nonEmptyString">
-    <xs:restriction base="xs:string">
-      <xs:minLength value="1"/>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="agencyworkertype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="0"><xs:annotation><xs:documentation>Not an Agency Worker</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="1"><xs:annotation><xs:documentation>Agency Worker</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-
-  <xs:simpleType name="swetype">
-    <xs:restriction base="xs:string">
-      <xs:pattern value="[A-Za-z]{2}\d{10}"/>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="ftetype">
-    <xs:restriction base="xs:decimal">
-        <xs:fractionDigits fixed="true" value="6"/>
-		<xs:minInclusive value="0"/>
-		<xs:maxInclusive value="1"/>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="gendertype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="0"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Male</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="2"><xs:annotation><xs:documentation>Female</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Specified</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="ethnicitytype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="WBRI"><xs:annotation><xs:documentation>White British</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="WIRI"><xs:annotation><xs:documentation>White Irish</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="WOTH"><xs:annotation><xs:documentation>Any Other White Background</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="MWBC"><xs:annotation><xs:documentation>White and Black Caribbean</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="MWBA"><xs:annotation><xs:documentation>White and Black African</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="MWAS"><xs:annotation><xs:documentation>White and Asian</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="MOTH"><xs:annotation><xs:documentation>Any Other Mixed Background</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="AIND"><xs:annotation><xs:documentation>Indian</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="APKN"><xs:annotation><xs:documentation>Pakistani</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="ABAN"><xs:annotation><xs:documentation>Bangladeshi</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="AOTH"><xs:annotation><xs:documentation>Any Other Asian Background</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="BCRB"><xs:annotation><xs:documentation>Black Caribbean</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="BAFR"><xs:annotation><xs:documentation>Black African</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="BOTH"><xs:annotation><xs:documentation>Any Other Black Background</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="CHNE"><xs:annotation><xs:documentation>Chinese</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="OOTH"><xs:annotation><xs:documentation>Any Other Ethnic Group</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="REFU"><xs:annotation><xs:documentation>Declared not stated or Refused</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="NOBT"><xs:annotation><xs:documentation>Information Not Yet Obtained</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="qualtype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Under-graduate</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="2"><xs:annotation><xs:documentation>Post-graduate</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="3"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="yesnotype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="0"><xs:annotation><xs:documentation>No</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Yes</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="roletype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Senior Manager</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="2"><xs:annotation><xs:documentation>Middle Manager</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="3"><xs:annotation><xs:documentation>First Line Manager</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="4"><xs:annotation><xs:documentation>Senior Practicioner</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="5"><xs:annotation><xs:documentation>Case Holder</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="6"><xs:annotation><xs:documentation>Qualified Without Cases</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="origintype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Newly Qualified Social Workers</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="2"><xs:annotation><xs:documentation>Social Worker Role in a Different Local Authority in England</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="3"><xs:annotation><xs:documentation>Social Worker Role Outside England</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="4"><xs:annotation><xs:documentation>Agency or Consultancy in Social Work (in England)</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="5"><xs:annotation><xs:documentation>Other Social Work Role Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="6"><xs:annotation><xs:documentation>Other Social Care Role in Local Authority/Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="7"><xs:annotation><xs:documentation>Non-social Care Role/Any Role Outside England/No Employment/Career Break</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="8"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="10"><xs:annotation><xs:documentation>Not Yet Collected</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="leavertype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Social Worker Role in a Different Local Authority in England</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="2"><xs:annotation><xs:documentation>Social Worker Role Outside England</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="3"><xs:annotation><xs:documentation>Agency or Consultancy in Social Work (in England)</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="4"><xs:annotation><xs:documentation>Other Social Work Role Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="5"><xs:annotation><xs:documentation>Other Social Care Role in Local Authority/Non-local Authority (in England)</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="6"><xs:annotation><xs:documentation>Non-social Care Role/Any Role Outside England/No Employment/Career Break</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="7"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="8"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Yet Collected</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="reasontype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Resignation</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="2"><xs:annotation><xs:documentation>Voluntary Redundancy</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="3"><xs:annotation><xs:documentation>Compulsory Redundancy</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="4"><xs:annotation><xs:documentation>Dismissed</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="5"><xs:annotation><xs:documentation>Retired</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="6"><xs:annotation><xs:documentation>Deceased</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="7"><xs:annotation><xs:documentation>Moved to a Non-child and Family Social Work Role Within LA</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="8"><xs:annotation><xs:documentation>Other</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="9"><xs:annotation><xs:documentation>Not Known</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="10"><xs:annotation><xs:documentation>Not Yet Collected</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="absencetype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="MAT"><xs:annotation><xs:documentation>Maternity/Paternity leave</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="OTH"><xs:annotation><xs:documentation>Other Paid Authorised Absence, Such As: Compassionate Leave, Annual Leave Requiring Reallocation Of Cases</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="PUB"><xs:annotation><xs:documentation>Paid Absence For Public Duties, Such As: Jury Duty</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="SIC"><xs:annotation><xs:documentation>Sick Leave</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="TRN"><xs:annotation><xs:documentation>Training</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="UNA"><xs:annotation><xs:documentation>Unauthorised Absence</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="UNP"><xs:annotation><xs:documentation>Unpaid Authorised Absence</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-  
-  <xs:simpleType name="cfksstype">
-    <xs:restriction base="nonEmptyString">
-      <xs:enumeration value="1"><xs:annotation><xs:documentation>Assessed and Supported Year in Employment (AYSE)</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="2"><xs:annotation><xs:documentation>Frontline Practitioner</xs:documentation></xs:annotation></xs:enumeration>
-	  <xs:enumeration value="3"><xs:annotation><xs:documentation>Practice Supervisor</xs:documentation></xs:annotation></xs:enumeration>
-      <xs:enumeration value="4"><xs:annotation><xs:documentation>Practice Leader</xs:documentation></xs:annotation></xs:enumeration>
-    </xs:restriction>
-  </xs:simpleType>
-
-</xs:schema>
\ No newline at end of file