From 819e4fb47c2862a56a48e83c5a60cd620852529b Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 20 Jun 2023 08:14:17 +0000
Subject: [PATCH 01/40] Work in progress
---
.../csww_main_functions.py | 94 +++++++++
.../lds_csww_clean/configuration.py | 80 ++++++++
.../lds_csww_clean/csww_record.py | 179 ++++++++++++++++++
.../lds_csww_clean/file_creator.py | 62 ++++++
.../lds_csww_clean/schema.py | 17 ++
5 files changed, 432 insertions(+)
create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py
create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
create mode 100644 liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index c68a681c..3184c26f 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,9 +1,35 @@
+import logging
+import click_log
+import click as click
+from pathlib import Path
+from datetime import datetime
+
from liiatools.datasets.social_work_workforce.sample_data import (
generate_sample_csww_file,
)
from liiatools.csdatatools.util.stream import consume
from liiatools.csdatatools.util.xml import etree, to_xml
+# Dependencies for cleanfile()
+from liiatools.csdatatools.util.xml import dom_parse
+from liiatools.csdatatools.datasets.cincensus import filters
+from liiatools.datasets.social_work_workforce.lds_csww_clean.schema import Schema
+
+from liiatools.datasets.social_work_workforce.lds_csww_clean import (
+ file_creator,
+ configuration as clean_config,
+ csww_record
+)
+
+from liiatools.datasets.shared_functions.common import (
+ flip_dict,
+ check_file_type,
+ supported_file_types,
+ check_year,
+ check_year_within_range,
+ save_year_error,
+ save_incorrect_year_error
+)
def generate_sample(output: str):
"""
@@ -25,3 +51,71 @@ def generate_sample(output: str):
FILE.write(element)
except FileNotFoundError:
print("The file path provided does not exist")
+
+def cleanfile(input, la_code, la_log_dir, output):
+ """
+ Cleans input Children Social Work workforce xml files according to config and outputs cleaned csv files.
+ :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+ :param la_code: should be a three-letter string for the local authority depositing the file
+ :param la_log_dir: should specify the path to the local authority's log folder
+ :param output: should specify the path to the output folder
+ :return: None
+ """
+
+ # Open & Parse file
+ print("Starting # Open & Parse file")
+ if (
+ check_file_type(
+ input,
+ file_types=[".xml"],
+ supported_file_types=supported_file_types,
+ la_log_dir=la_log_dir,
+ )
+ == "incorrect file type"
+ ):
+ return
+ stream = dom_parse(input)
+ stream = list(stream)
+
+ # Get year from input file
+ try:
+ filename = str(Path(input).resolve().stem)
+ input_year = check_year(filename)
+ except (AttributeError, ValueError):
+ save_year_error(input, la_log_dir)
+ return
+
+ # Check year is within acceptable range for data retention policy
+ print("Starting # Check year")
+ years_to_go_back = 6
+ year_start_month = 6
+ reference_date = datetime.now()
+ if check_year_within_range(input_year, years_to_go_back, year_start_month, reference_date) is False:
+ save_incorrect_year_error(input, la_log_dir)
+ return
+
+ # Configure stream
+ print("Starting # Configure stream")
+ config = clean_config.Config()
+ la_name = flip_dict(config["data_codes"])[la_code]
+ stream = filters.strip_text(stream)
+ stream = filters.add_context(stream)
+ stream = filters.add_schema(stream, schema=Schema(input_year).schema)
+
+ # Output result
+ #print("Starting # Output result")
+ stream = csww_record.message_collector(stream) # <=== this is the problem - not returning any stream data
+ #print(f"Stream = {stream}")
+ data = csww_record.export_table(stream)
+ #print(f"Data = {data}")
+ data = file_creator.add_fields(input_year, data, la_name, la_code)
+ #print(data)
+ file_creator.export_file(input, output, data)
+
+cleanfile("/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+ "BAD",
+ "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+ "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean"
+ )
+
+print("===> Finished running csww_main_functions.py")
\ No newline at end of file
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py
new file mode 100644
index 00000000..8d3f291c
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/configuration.py
@@ -0,0 +1,80 @@
+import datetime
+import logging
+import os
+from pathlib import Path
+import yaml
+from string import Template
+
+from liiatools.spec import common as common_asset_dir
+
+log = logging.getLogger(__name__)
+
+COMMON_CONFIG_DIR = Path(common_asset_dir.__file__).parent
+
+
+class Config(dict):
+ def __init__(self, *config_files):
+ super().__init__()
+
+ if not config_files:
+ config_files = [
+ "DEFAULT_DATA_CODES",
+ ]
+
+ for file in config_files:
+ if file == "DEFAULT_DATA_CODES":
+ file = COMMON_CONFIG_DIR / "LA-codes.yml"
+ self.load_config(file, conditional=False)
+
+ self["config_date"] = datetime.datetime.now().isoformat()
+ try:
+ self["username"] = os.getlogin()
+ except OSError:
+ # This happens when tests are not run under a login shell, e.g. CI pipeline
+ pass
+
+ def load_config(self, filename, conditional=False, warn=False):
+ """
+ Load configuration from yaml file. Any loaded configuration
+ is only set if the values don't already exist in CONFIG.
+
+ Files can contain ${} placeholders following the Python string.Template format.
+ The context will include any keys already existing in the configuration, any keys
+ from the current file - however, if these include placeholders, the placeholders
+ will not be replaced. Finally, environment variables can be referenced with
+ `os_environ_VARIABLE_NAME`.
+
+ Keyword arguments:
+ filename -- Filename to load from
+ conditional -- If True, ignore file if it doesn't exist. If False, fail. (default False)
+ """
+ if conditional and not os.path.isfile(filename):
+ if warn:
+ log.warning("Missing optional file {}".format(filename))
+
+ return
+
+ with open(filename) as FILE:
+ user_config = yaml.load(FILE, Loader=yaml.FullLoader)
+
+ log.info(
+ "Loading {} configuration values from '{}'.".format(
+ len(user_config), filename
+ )
+ )
+
+ environment_dict = {"os_environ_{}".format(k): v for k, v in os.environ.items()}
+
+ variables = dict(self)
+ variables.update(user_config)
+ variables.update(environment_dict)
+
+ with open(filename, "rt") as FILE:
+ user_config_string = FILE.read()
+
+ user_config_template = Template(user_config_string)
+ user_config_string = user_config_template.substitute(variables)
+
+ user_config = yaml.load(user_config_string, Loader=yaml.FullLoader)
+
+ self.update(user_config)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
new file mode 100644
index 00000000..57e0b379
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -0,0 +1,179 @@
+from typing import Iterator
+import tablib
+from more_itertools import peekable
+
+from sfdata_stream_parser import events
+from sfdata_stream_parser.collectors import xml_collector
+
+
+class CSWWEvent(events.ParseEvent):
+ pass
+
+
+class HeaderEvent(events.ParseEvent):
+ pass
+
+
+def _reduce_dict(dict_instance):
+ new_dict = {}
+ for key, value in dict_instance.items():
+ if len(value) == 1:
+ new_dict[key] = value[0]
+ else:
+ new_dict[key] = value
+ return new_dict
+
+
+@xml_collector
+def text_collector(stream):
+ data_dict = {}
+ current_element = None
+ for event in stream:
+ if isinstance(event, events.StartElement):
+ current_element = event.tag
+ if isinstance(event, events.TextNode) and event.text:
+ data_dict.setdefault(current_element, []).append(event.text)
+ return _reduce_dict(data_dict)
+
+
+# @xml_collector
+# def csww_collector(stream):
+# data_dict = {}
+# stream = peekable(stream)
+# last_tag = None
+# while stream:
+# event = stream.peek()
+# last_tag = event.get("tag", last_tag)
+# if event.get("tag") in (
+# "Assessments",
+# "CINPlanDates",
+# "Section47",
+# "ChildProtectionPlans",
+# ):
+# data_dict.setdefault(event.tag, []).append(text_collector(stream))
+# else:
+# if isinstance(event, events.TextNode) and event.text:
+# data_dict.setdefault(last_tag, []).append(event.text)
+# next(stream)
+
+# return _reduce_dict(data_dict)
+
+
+# @xml_collector
+# def cswwworker_collector(stream):
+# data_dict = {}
+# stream = peekable(stream)
+# assert stream.peek().tag == "CSWWWorker"
+# while stream:
+# event = stream.peek()
+# print(f"Event tag = {event.get('tag')}")
+# if event.get("tag") == "CSWWdetails":
+# data_dict.setdefault(event.tag, []).append(text_collector(stream))
+# # elif event.get("tag") == "LALevelVacancies":
+# # data_dict.setdefault(event.tag, []).append(csww_collector(stream))
+# else:
+# next(stream)
+# return _reduce_dict(data_dict)
+
+
+@xml_collector
+def message_collector(stream):
+ stream = peekable(stream)
+ assert stream.peek().tag == "Message", "Expected Message, got {}".format(
+ stream.peek().tag
+ )
+ while stream:
+ event = stream.peek()
+ if event.get("tag") == "Header":
+ print(f"Header stream = {stream}")
+ header_record = text_collector(stream)
+ if header_record:
+ yield HeaderEvent(record=header_record)
+ elif event.get("tag") == "CSWWWorker":
+ csww_record = text_collector(stream)
+ if csww_record:
+ #print(f"yielding csww_record event: {CSWWEvent(record=csww_record)}")
+ #print(f"CSWWEvent(record=csww_record) = {CSWWEvent(record=csww_record)}")
+ yield CSWWEvent(record=csww_record)
+ else:
+ next(stream)
+
+
+__EXPORT_HEADERS = [
+ "AgencyWorker",
+ "SWENo",
+ "FTE",
+ "PersonBirthDate",
+ "GenderCurrent",
+ "Ethnicity",
+ "QualInst",
+ "StepUpGrad",
+ "RoleStartDate",
+ "StartOrigin",
+ "Cases30",
+ "WorkingDaysLost",
+ "ContractWeeks",
+ "FrontlineGrad",
+ "Absat30Sept",
+ "ReasonAbsence",
+ "CFKSSstatus",
+]
+
+
+def _maybe_list(value):
+ if value is None:
+ value = []
+ if not isinstance(value, list):
+ value = [value]
+ print(f"maybe_list(value) = {value}")
+ return value
+
+
+def csww_event(record, property, event_name=None):
+ if event_name is None:
+ event_name = property
+ value = record.get(property)
+ if value:
+ new_record = {**record, "Date": value, "Type": event_name}
+ return ({k: new_record.get(k) for k in __EXPORT_HEADERS},)
+
+ return ()
+
+
+def event_to_records(event: CSWWEvent) -> Iterator[dict]:
+ record = event.record
+ #print(f"event.record = {record}")
+ # child = {
+ # **record.get("ChildIdentifiers", {}),
+ # **record.get("ChildCharacteristics", {}),
+ # }
+ # child["Disabilities"] = ",".join(_maybe_list(child.get("Disability")))
+
+ print(record.get("CSWWWorker"))
+ for csww_item in _maybe_list(record):
+ yield from csww_event({**child, **csww_item}, "CINreferralDate")
+ yield from csww_event({**child, **csww_item}, "CINclosureDate")
+
+ for assessment in _maybe_list(csww_item.get("Assessments")):
+ assessment["Factors"] = ",".join(
+ _maybe_list(assessment.get("AssessmentFactors"))
+ )
+ yield from csww_event(
+ {**child, **csww_item, **assessment}, "AssessmentActualStartDate"
+ )
+ yield from csww_event(
+ {**child, **csww_item, **assessment}, "AssessmentAuthorisationDate"
+ )
+
+
+def export_table(stream):
+ #print(f"export_table() called for stream: {stream}")
+ data = tablib.Dataset(headers=__EXPORT_HEADERS)
+ #print(f"header data in export_table() = {data}")
+ for event in stream:
+ if isinstance(event, CSWWEvent):
+ for record in event_to_records(event):
+ #print("Found data to append")
+ data.append([record.get(k, "") for k in __EXPORT_HEADERS])
+ else: print("No row data to append")
+ return data
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
new file mode 100644
index 00000000..d7200558
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+import pandas as pd
+import logging
+
+from liiatools.datasets.shared_functions import converters, common
+
+log = logging.getLogger(__name__)
+
+def convert_to_dataframe(data):
+ data = data.export("df")
+ return data
+
+
+def get_year(data, year):
+ data["YEAR"] = year
+ return data
+
+def convert_to_datetime(data):
+ data[["PersonBirthDate", "RoleStartDate"]] = data[
+ ["PersonBirthDate", "RoleStartDate"]
+ ].apply(pd.to_datetime)
+ return data
+
+
+def add_la_name(data, la_name):
+ data["LA"] = la_name
+ return data
+
+
+# def la_prefix(data, la_code):
+# data["LAchildID"] = data["LAchildID"] + "_" + la_code
+# return data
+
+
+def add_fields(input_year, data, la_name, la_code):
+ """
+ Add YEAR, LA, PERSONSCHOOLYEAR to exported dataframe
+ Append LA_code from config to LAChildID
+
+ :param input_year: A string of the year of return for the current file
+ :param data: The dataframe to be cleaned
+ :param la_name: LA name
+ :param la_code: LA code
+ :return: Cleaned and degraded dataframe
+ """
+ data = convert_to_dataframe(data)
+ data = get_year(data, input_year)
+ data = convert_to_datetime(data)
+ #data = add_school_year(data)
+ data = add_la_name(data, la_name)
+ #data = la_prefix(data, la_code)
+ #data = degrade_dob(data)
+ #data = degrade_expected_dob(data)
+ #data = degrade_death_date(data)
+ return data
+
+
+def export_file(input, output, data):
+ filename = Path(input).stem
+ outfile = filename + "_clean.csv"
+ output_path = Path(output, outfile)
+ data.to_csv(output_path, index=False)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
new file mode 100644
index 00000000..a8d67146
--- /dev/null
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
@@ -0,0 +1,17 @@
+from functools import cached_property
+from pathlib import Path
+
+import xmlschema
+
+from liiatools.spec import social_work_workforce as csww_asset_dir
+
+
+class Schema:
+ def __init__(self, year):
+ self.__year = year
+
+ @cached_property
+ def schema(self) -> xmlschema.XMLSchema:
+ return xmlschema.XMLSchema(
+ Path(csww_asset_dir.__file__).parent / f"social_work_workforce_{self.__year}.xsd"
+ )
From f795e54f7bc8169c332b0dae715ea5471f35befc Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 21 Jun 2023 13:44:27 +0000
Subject: [PATCH 02/40] Fix to produce cleanfile output
---
.../csww_main_functions.py | 8 ---
.../lds_csww_clean/csww_record.py | 71 +------------------
2 files changed, 2 insertions(+), 77 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 3184c26f..a381812e 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -63,7 +63,6 @@ def cleanfile(input, la_code, la_log_dir, output):
"""
# Open & Parse file
- print("Starting # Open & Parse file")
if (
check_file_type(
input,
@@ -86,7 +85,6 @@ def cleanfile(input, la_code, la_log_dir, output):
return
# Check year is within acceptable range for data retention policy
- print("Starting # Check year")
years_to_go_back = 6
year_start_month = 6
reference_date = datetime.now()
@@ -95,21 +93,15 @@ def cleanfile(input, la_code, la_log_dir, output):
return
# Configure stream
- print("Starting # Configure stream")
config = clean_config.Config()
la_name = flip_dict(config["data_codes"])[la_code]
stream = filters.strip_text(stream)
stream = filters.add_context(stream)
stream = filters.add_schema(stream, schema=Schema(input_year).schema)
-
# Output result
- #print("Starting # Output result")
stream = csww_record.message_collector(stream) # <=== this is the problem - not returning any stream data
- #print(f"Stream = {stream}")
data = csww_record.export_table(stream)
- #print(f"Data = {data}")
data = file_creator.add_fields(input_year, data, la_name, la_code)
- #print(data)
file_creator.export_file(input, output, data)
cleanfile("/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 57e0b379..b0731819 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -36,46 +36,6 @@ def text_collector(stream):
return _reduce_dict(data_dict)
-# @xml_collector
-# def csww_collector(stream):
-# data_dict = {}
-# stream = peekable(stream)
-# last_tag = None
-# while stream:
-# event = stream.peek()
-# last_tag = event.get("tag", last_tag)
-# if event.get("tag") in (
-# "Assessments",
-# "CINPlanDates",
-# "Section47",
-# "ChildProtectionPlans",
-# ):
-# data_dict.setdefault(event.tag, []).append(text_collector(stream))
-# else:
-# if isinstance(event, events.TextNode) and event.text:
-# data_dict.setdefault(last_tag, []).append(event.text)
-# next(stream)
-
-# return _reduce_dict(data_dict)
-
-
-# @xml_collector
-# def cswwworker_collector(stream):
-# data_dict = {}
-# stream = peekable(stream)
-# assert stream.peek().tag == "CSWWWorker"
-# while stream:
-# event = stream.peek()
-# print(f"Event tag = {event.get('tag')}")
-# if event.get("tag") == "CSWWdetails":
-# data_dict.setdefault(event.tag, []).append(text_collector(stream))
-# # elif event.get("tag") == "LALevelVacancies":
-# # data_dict.setdefault(event.tag, []).append(csww_collector(stream))
-# else:
-# next(stream)
-# return _reduce_dict(data_dict)
-
-
@xml_collector
def message_collector(stream):
stream = peekable(stream)
@@ -85,15 +45,12 @@ def message_collector(stream):
while stream:
event = stream.peek()
if event.get("tag") == "Header":
- print(f"Header stream = {stream}")
header_record = text_collector(stream)
if header_record:
yield HeaderEvent(record=header_record)
elif event.get("tag") == "CSWWWorker":
csww_record = text_collector(stream)
if csww_record:
- #print(f"yielding csww_record event: {CSWWEvent(record=csww_record)}")
- #print(f"CSWWEvent(record=csww_record) = {CSWWEvent(record=csww_record)}")
yield CSWWEvent(record=csww_record)
else:
next(stream)
@@ -125,7 +82,6 @@ def _maybe_list(value):
value = []
if not isinstance(value, list):
value = [value]
- print(f"maybe_list(value) = {value}")
return value
@@ -142,38 +98,15 @@ def csww_event(record, property, event_name=None):
def event_to_records(event: CSWWEvent) -> Iterator[dict]:
record = event.record
- #print(f"event.record = {record}")
- # child = {
- # **record.get("ChildIdentifiers", {}),
- # **record.get("ChildCharacteristics", {}),
- # }
- # child["Disabilities"] = ",".join(_maybe_list(child.get("Disability")))
-
- print(record.get("CSWWWorker"))
- for csww_item in _maybe_list(record):
- yield from csww_event({**child, **csww_item}, "CINreferralDate")
- yield from csww_event({**child, **csww_item}, "CINclosureDate")
- for assessment in _maybe_list(csww_item.get("Assessments")):
- assessment["Factors"] = ",".join(
- _maybe_list(assessment.get("AssessmentFactors"))
- )
- yield from csww_event(
- {**child, **csww_item, **assessment}, "AssessmentActualStartDate"
- )
- yield from csww_event(
- {**child, **csww_item, **assessment}, "AssessmentAuthorisationDate"
- )
+ for csww_item in _maybe_list(record):
+ yield from csww_event({**csww_item}, "StepUpGrad")
def export_table(stream):
- #print(f"export_table() called for stream: {stream}")
data = tablib.Dataset(headers=__EXPORT_HEADERS)
- #print(f"header data in export_table() = {data}")
for event in stream:
if isinstance(event, CSWWEvent):
for record in event_to_records(event):
- #print("Found data to append")
data.append([record.get(k, "") for k in __EXPORT_HEADERS])
- else: print("No row data to append")
return data
From 8d3bd6c60da4959460e395bb894f3dd81527fbfa Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 21 Jun 2023 14:00:10 +0000
Subject: [PATCH 03/40] Run python black
---
.../csww_main_functions.py | 31 +++++++++++++------
.../lds_csww_clean/csww_record.py | 2 +-
.../lds_csww_clean/file_creator.py | 12 ++++---
.../lds_csww_clean/schema.py | 3 +-
4 files changed, 31 insertions(+), 17 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index a381812e..30290cfb 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -18,7 +18,7 @@
from liiatools.datasets.social_work_workforce.lds_csww_clean import (
file_creator,
configuration as clean_config,
- csww_record
+ csww_record,
)
from liiatools.datasets.shared_functions.common import (
@@ -28,9 +28,10 @@
check_year,
check_year_within_range,
save_year_error,
- save_incorrect_year_error
+ save_incorrect_year_error,
)
+
def generate_sample(output: str):
"""
Export a sample file for testing
@@ -52,6 +53,7 @@ def generate_sample(output: str):
except FileNotFoundError:
print("The file path provided does not exist")
+
def cleanfile(input, la_code, la_log_dir, output):
"""
Cleans input Children Social Work workforce xml files according to config and outputs cleaned csv files.
@@ -88,7 +90,12 @@ def cleanfile(input, la_code, la_log_dir, output):
years_to_go_back = 6
year_start_month = 6
reference_date = datetime.now()
- if check_year_within_range(input_year, years_to_go_back, year_start_month, reference_date) is False:
+ if (
+ check_year_within_range(
+ input_year, years_to_go_back, year_start_month, reference_date
+ )
+ is False
+ ):
save_incorrect_year_error(input, la_log_dir)
return
@@ -99,15 +106,19 @@ def cleanfile(input, la_code, la_log_dir, output):
stream = filters.add_context(stream)
stream = filters.add_schema(stream, schema=Schema(input_year).schema)
# Output result
- stream = csww_record.message_collector(stream) # <=== this is the problem - not returning any stream data
+ stream = csww_record.message_collector(
+ stream
+ ) # <=== this is the problem - not returning any stream data
data = csww_record.export_table(stream)
data = file_creator.add_fields(input_year, data, la_name, la_code)
file_creator.export_file(input, output, data)
-cleanfile("/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
- "BAD",
- "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
- "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean"
- )
-print("===> Finished running csww_main_functions.py")
\ No newline at end of file
+cleanfile(
+ "/workspaces/liia-tools/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml",
+ "BAD",
+ "/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+ "/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
+)
+
+print("===> Finished running csww_main_functions.py")
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index b0731819..1a3c1b83 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -59,7 +59,7 @@ def message_collector(stream):
__EXPORT_HEADERS = [
"AgencyWorker",
"SWENo",
- "FTE",
+ "FTE",
"PersonBirthDate",
"GenderCurrent",
"Ethnicity",
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index d7200558..d951c358 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -6,6 +6,7 @@
log = logging.getLogger(__name__)
+
def convert_to_dataframe(data):
data = data.export("df")
return data
@@ -15,6 +16,7 @@ def get_year(data, year):
data["YEAR"] = year
return data
+
def convert_to_datetime(data):
data[["PersonBirthDate", "RoleStartDate"]] = data[
["PersonBirthDate", "RoleStartDate"]
@@ -46,12 +48,12 @@ def add_fields(input_year, data, la_name, la_code):
data = convert_to_dataframe(data)
data = get_year(data, input_year)
data = convert_to_datetime(data)
- #data = add_school_year(data)
+ # data = add_school_year(data)
data = add_la_name(data, la_name)
- #data = la_prefix(data, la_code)
- #data = degrade_dob(data)
- #data = degrade_expected_dob(data)
- #data = degrade_death_date(data)
+ # data = la_prefix(data, la_code)
+ # data = degrade_dob(data)
+ # data = degrade_expected_dob(data)
+ # data = degrade_death_date(data)
return data
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
index a8d67146..f82b5eb1 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/schema.py
@@ -13,5 +13,6 @@ def __init__(self, year):
@cached_property
def schema(self) -> xmlschema.XMLSchema:
return xmlschema.XMLSchema(
- Path(csww_asset_dir.__file__).parent / f"social_work_workforce_{self.__year}.xsd"
+ Path(csww_asset_dir.__file__).parent
+ / f"social_work_workforce_{self.__year}.xsd"
)
From ac82a0ca801978f291094bb55b6d841c35b0d643 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 3 Jul 2023 11:48:05 +0000
Subject: [PATCH 04/40] Minor changes
---
liiatools/datasets/social_work_workforce/csww_main_functions.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 30290cfb..970638d2 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -120,5 +120,3 @@ def cleanfile(input, la_code, la_log_dir, output):
"/workspaces/liia_tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
"/workspaces/liia-tools/liiatools/datasets/social_work_workforce/lds_csww_clean",
)
-
-print("===> Finished running csww_main_functions.py")
From 333617e85e01837191a2cdbf02c0745ec685f16d Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Mon, 3 Jul 2023 11:49:10 +0000
Subject: [PATCH 05/40] Sort imports
---
.../social_work_workforce/csww_main_functions.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index 970638d2..f10e0596 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,7 +1,7 @@
-import logging
-import click_log
-import click as click
+# import logging
+# import click_log
from pathlib import Path
+import click as click
from datetime import datetime
from liiatools.datasets.social_work_workforce.sample_data import (
@@ -105,10 +105,11 @@ def cleanfile(input, la_code, la_log_dir, output):
stream = filters.strip_text(stream)
stream = filters.add_context(stream)
stream = filters.add_schema(stream, schema=Schema(input_year).schema)
+
# Output result
stream = csww_record.message_collector(
stream
- ) # <=== this is the problem - not returning any stream data
+ )
data = csww_record.export_table(stream)
data = file_creator.add_fields(input_year, data, la_name, la_code)
file_creator.export_file(input, output, data)
From 0a077e2e13b46c44d3e6ec707c7df342cb57e2ef Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 10:48:45 +0000
Subject: [PATCH 06/40] Create separate csv clean file for LA level data
---
.../csww_main_functions.py | 24 ++++----
.../lds_csww_clean/csww_record.py | 58 +++++++++++++++----
.../lds_csww_clean/file_creator.py | 26 ++++++---
3 files changed, 76 insertions(+), 32 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/csww_main_functions.py b/liiatools/datasets/social_work_workforce/csww_main_functions.py
index f10e0596..8e41ade8 100644
--- a/liiatools/datasets/social_work_workforce/csww_main_functions.py
+++ b/liiatools/datasets/social_work_workforce/csww_main_functions.py
@@ -1,7 +1,4 @@
-# import logging
-# import click_log
from pathlib import Path
-import click as click
from datetime import datetime
from liiatools.datasets.social_work_workforce.sample_data import (
@@ -87,8 +84,8 @@ def cleanfile(input, la_code, la_log_dir, output):
return
# Check year is within acceptable range for data retention policy
- years_to_go_back = 6
- year_start_month = 6
+ years_to_go_back = 7
+ year_start_month = 1
reference_date = datetime.now()
if (
check_year_within_range(
@@ -105,14 +102,15 @@ def cleanfile(input, la_code, la_log_dir, output):
stream = filters.strip_text(stream)
stream = filters.add_context(stream)
stream = filters.add_schema(stream, schema=Schema(input_year).schema)
-
- # Output result
- stream = csww_record.message_collector(
- stream
- )
- data = csww_record.export_table(stream)
- data = file_creator.add_fields(input_year, data, la_name, la_code)
- file_creator.export_file(input, output, data)
+
+ # Output results
+ stream = csww_record.message_collector(stream)
+
+ data_worker, data_lalevel = csww_record.export_table(stream)
+ data_worker = file_creator.add_fields(input_year, data_worker, la_name)
+ data_lalevel = file_creator.add_fields(input_year, data_lalevel, la_name)
+ file_creator.export_file(input, output, data_lalevel, "lalevel")
+ file_creator.export_file(input, output, data_worker, "worker")
cleanfile(
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 1a3c1b83..07414065 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -10,6 +10,10 @@ class CSWWEvent(events.ParseEvent):
pass
+class LALevelEvent(events.ParseEvent):
+ pass
+
+
class HeaderEvent(events.ParseEvent):
pass
@@ -52,11 +56,15 @@ def message_collector(stream):
csww_record = text_collector(stream)
if csww_record:
yield CSWWEvent(record=csww_record)
+ elif event.get("tag") == "LALevelVacancies":
+ lalevel_record = text_collector(stream)
+ if lalevel_record:
+ yield LALevelEvent(record=lalevel_record)
else:
next(stream)
-__EXPORT_HEADERS = [
+__EXPORT_HEADERS_CSWWWORKER = [
"AgencyWorker",
"SWENo",
"FTE",
@@ -76,6 +84,12 @@ def message_collector(stream):
"CFKSSstatus",
]
+__EXPORT_HEADERS_LALEVELVAC = [
+ "NumberOfVacancies",
+ "NoAgencyFTE",
+ "NoAgencyHeadcount",
+]
+
def _maybe_list(value):
if value is None:
@@ -85,28 +99,52 @@ def _maybe_list(value):
return value
-def csww_event(record, property, event_name=None):
+def csww_event_worker(record, property, event_name=None):
if event_name is None:
event_name = property
value = record.get(property)
if value:
new_record = {**record, "Date": value, "Type": event_name}
- return ({k: new_record.get(k) for k in __EXPORT_HEADERS},)
+ return ({k: new_record.get(k) for k in __EXPORT_HEADERS_CSWWWORKER},)
return ()
-def event_to_records(event: CSWWEvent) -> Iterator[dict]:
- record = event.record
+def lalevel_event(record, property, event_name=None):
+ if event_name is None:
+ event_name = property
+ value = record.get(property)
+ if value:
+ new_record = {**record, "Date": value, "Type": event_name}
+ return ({k: new_record.get(k) for k in __EXPORT_HEADERS_LALEVELVAC},)
+
+ return ()
+
+def event_to_records_worker(event: CSWWEvent) -> Iterator[dict]:
+ record = event.record
for csww_item in _maybe_list(record):
- yield from csww_event({**csww_item}, "StepUpGrad")
+ yield from csww_event_worker({**csww_item}, "StepUpGrad")
+
+
+def event_to_records_lalevel(event: LALevelEvent) -> Iterator[dict]:
+ record = event.record
+ for lalevel_item in _maybe_list(record):
+ yield from lalevel_event({**lalevel_item}, "NoAgencyFTE")
def export_table(stream):
- data = tablib.Dataset(headers=__EXPORT_HEADERS)
+ data_worker = tablib.Dataset(headers=__EXPORT_HEADERS_CSWWWORKER)
+ data_lalevel = tablib.Dataset(headers=__EXPORT_HEADERS_LALEVELVAC)
for event in stream:
if isinstance(event, CSWWEvent):
- for record in event_to_records(event):
- data.append([record.get(k, "") for k in __EXPORT_HEADERS])
- return data
+ for record in event_to_records_worker(event):
+ data_worker.append(
+ [record.get(k, "") for k in __EXPORT_HEADERS_CSWWWORKER]
+ )
+ elif isinstance(event, LALevelEvent):
+ for record in event_to_records_lalevel(event):
+ data_lalevel.append(
+ [record.get(k, "") for k in __EXPORT_HEADERS_LALEVELVAC]
+ )
+ return data_worker, data_lalevel
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index d951c358..fa4d841b 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -34,22 +34,20 @@ def add_la_name(data, la_name):
# return data
-def add_fields(input_year, data, la_name, la_code):
+def add_fields(input_year, data, la_name):
"""
- Add YEAR, LA, PERSONSCHOOLYEAR to exported dataframe
- Append LA_code from config to LAChildID
+ Add YEAR, LA to exported dataframe
:param input_year: A string of the year of return for the current file
:param data: The dataframe to be cleaned
:param la_name: LA name
- :param la_code: LA code
:return: Cleaned and degraded dataframe
"""
data = convert_to_dataframe(data)
data = get_year(data, input_year)
- data = convert_to_datetime(data)
- # data = add_school_year(data)
data = add_la_name(data, la_name)
+
+ # data = convert_to_datetime(data)
# data = la_prefix(data, la_code)
# data = degrade_dob(data)
# data = degrade_expected_dob(data)
@@ -57,8 +55,18 @@ def add_fields(input_year, data, la_name, la_code):
return data
-def export_file(input, output, data):
- filename = Path(input).stem
- outfile = filename + "_clean.csv"
+def export_file(input, output, data, filenamelevel):
+ """
+ Output cleansed and degraded dataframe as csv file.
+ Example of output filename: social_work_workforce_2022_lalevel_clean.csv
+
+ :param input: should specify the input file location, including file name and suffix, and be usable by a Path function
+ :param output: should specify the path to the output folder
+ :param data: The cleansed dataframe to be output
+ :param filenamelevel: String appended to output filename indicating aggregation level - worker or LA level
+ :return: csv file containing cleaned and degraded dataframe
+ """
+ filenamestem = Path(input).stem
+ outfile = filenamestem + "_" + filenamelevel + "_clean.csv"
output_path = Path(output, outfile)
data.to_csv(output_path, index=False)
From 88a2f2953402b36331580a1260ebd8df8b5c5c74 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 12:44:11 +0000
Subject: [PATCH 07/40] Simplify event_to_records
---
.../lds_csww_clean/csww_record.py | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index 07414065..c2f8fdd3 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -121,16 +121,10 @@ def lalevel_event(record, property, event_name=None):
return ()
-def event_to_records_worker(event: CSWWEvent) -> Iterator[dict]:
+def event_to_records(event) -> Iterator[dict]:
record = event.record
- for csww_item in _maybe_list(record):
- yield from csww_event_worker({**csww_item}, "StepUpGrad")
-
-
-def event_to_records_lalevel(event: LALevelEvent) -> Iterator[dict]:
- record = event.record
- for lalevel_item in _maybe_list(record):
- yield from lalevel_event({**lalevel_item}, "NoAgencyFTE")
+ for item in _maybe_list(record):
+ yield from (item,)
def export_table(stream):
@@ -138,12 +132,12 @@ def export_table(stream):
data_lalevel = tablib.Dataset(headers=__EXPORT_HEADERS_LALEVELVAC)
for event in stream:
if isinstance(event, CSWWEvent):
- for record in event_to_records_worker(event):
+ for record in event_to_records(event):
data_worker.append(
[record.get(k, "") for k in __EXPORT_HEADERS_CSWWWORKER]
)
elif isinstance(event, LALevelEvent):
- for record in event_to_records_lalevel(event):
+ for record in event_to_records(event):
data_lalevel.append(
[record.get(k, "") for k in __EXPORT_HEADERS_LALEVELVAC]
)
From 7e5102216ba6db9583ad24576c4137f867ba8b33 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 12:50:14 +0000
Subject: [PATCH 08/40] Remove unnecessary event functions
---
.../lds_csww_clean/csww_record.py | 22 -------------------
1 file changed, 22 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
index c2f8fdd3..7eea634b 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/csww_record.py
@@ -99,28 +99,6 @@ def _maybe_list(value):
return value
-def csww_event_worker(record, property, event_name=None):
- if event_name is None:
- event_name = property
- value = record.get(property)
- if value:
- new_record = {**record, "Date": value, "Type": event_name}
- return ({k: new_record.get(k) for k in __EXPORT_HEADERS_CSWWWORKER},)
-
- return ()
-
-
-def lalevel_event(record, property, event_name=None):
- if event_name is None:
- event_name = property
- value = record.get(property)
- if value:
- new_record = {**record, "Date": value, "Type": event_name}
- return ({k: new_record.get(k) for k in __EXPORT_HEADERS_LALEVELVAC},)
-
- return ()
-
-
def event_to_records(event) -> Iterator[dict]:
record = event.record
for item in _maybe_list(record):
From db9cd7d31d853c530e47bbc0f29c5e161df752d5 Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Tue, 4 Jul 2023 15:09:34 +0000
Subject: [PATCH 09/40] Degrade dob and SWENo
---
.../lds_csww_clean/file_creator.py | 57 +++++++++++++++----
1 file changed, 46 insertions(+), 11 deletions(-)
diff --git a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
index fa4d841b..3f383dcd 100644
--- a/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
+++ b/liiatools/datasets/social_work_workforce/lds_csww_clean/file_creator.py
@@ -1,6 +1,9 @@
from pathlib import Path
import pandas as pd
import logging
+import hashlib
+from typing import Dict
+from decouple import config
from liiatools.datasets.shared_functions import converters, common
@@ -18,9 +21,10 @@ def get_year(data, year):
def convert_to_datetime(data):
- data[["PersonBirthDate", "RoleStartDate"]] = data[
- ["PersonBirthDate", "RoleStartDate"]
- ].apply(pd.to_datetime)
+ if set(["PersonBirthDate", "RoleStartDate"]).issubset(data):
+ data[["PersonBirthDate", "RoleStartDate"]] = data[
+ ["PersonBirthDate", "RoleStartDate"]
+ ].apply(pd.to_datetime)
return data
@@ -29,9 +33,42 @@ def add_la_name(data, la_name):
return data
-# def la_prefix(data, la_code):
-# data["LAchildID"] = data["LAchildID"] + "_" + la_code
-# return data
+def degrade_dob(data):
+ if "PersonBirthDate" in data:
+ if data["PersonBirthDate"] is not None:
+ data["PersonBirthDate"] = data["PersonBirthDate"].apply(
+ lambda row: converters.to_month_only_dob(row)
+ )
+ return data
+
+
+def degrade_SWENo(data):
+ """
+ Replaces SWE number with hashed version
+ """
+ if "SWENo" in data:
+ if data["SWENo"] is not None:
+ data["SWENo"] = data["SWENo"].apply(lambda row: swe_hash(row))
+ return data
+
+
+def swe_hash(swe_num):
+ """
+ Converts the **SWENo** field to a hash code represented in HEX
+ :param swe_num: SWE number to be converted
+ :return: Hash code version of SWE number
+ """
+
+ private_string = config("sec_str", default="")
+
+ private_key = swe_num + private_string
+
+ # Preparing plain text (SWENo) to hash it
+ plaintext = private_key.encode()
+
+ hash_algorithm = hashlib.sha3_256(plaintext)
+
+ return hash_algorithm.hexdigest()
def add_fields(input_year, data, la_name):
@@ -47,11 +84,9 @@ def add_fields(input_year, data, la_name):
data = get_year(data, input_year)
data = add_la_name(data, la_name)
- # data = convert_to_datetime(data)
- # data = la_prefix(data, la_code)
- # data = degrade_dob(data)
- # data = degrade_expected_dob(data)
- # data = degrade_death_date(data)
+ data = convert_to_datetime(data)
+ data = degrade_dob(data)
+ data = degrade_SWENo(data)
return data
From 57a90922163c2c45c42eb16a15edcd1f5ee663db Mon Sep 17 00:00:00 2001
From: Stephen C <127780498+StephenCarterLIIA@users.noreply.github.com>
Date: Wed, 5 Jul 2023 10:38:40 +0000
Subject: [PATCH 10/40] Create additional sample files for testing la_agg
---
.../csww/BAD/social_work_workforce_2021.xml | 556 ++++++++++++++++++
.../csww/BAD/social_work_workforce_2022.xml | 4 +-
.../social_work_workforce_2021.xsd | 254 ++++++++
3 files changed, 812 insertions(+), 2 deletions(-)
create mode 100644 liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
create mode 100644 liiatools/spec/social_work_workforce/social_work_workforce_2021.xsd
diff --git a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
new file mode 100644
index 00000000..8ef1b9ef
--- /dev/null
+++ b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2021.xml
@@ -0,0 +1,556 @@
+
+
+
+ CSWW
+ 2021
+ 2021-09-30
+
+
+
+
+ 66.66
+ 40.40
+ 100
+
+
+ 1
+ Ox2054309383
+ 0.521371
+ 1969-01-22
+ 1
+ REFU
+ Institution Name
+ 0
+ 1988-04-07
+ 9
+ 72
+ 15.31
+ 288.7
+ 1
+ 0
+ TRN
+ 1
+
+
+ 1
+ Yk7226043359
+ 0
+ 1958-04-07
+ 9
+ Institution Name
+ 1
+ 2
+ 8
+ 2019-09-23
+ 7
+ 10
+ 0.603665
+ 66
+ 29.87
+ 2.5
+ 1
+ 4
+
+
+ 1
+ iP8098309864
+ 0
+ 1984-01-12
+ APKN
+ Institution Name
+ 0
+ 5
+ 2014-01-26
+ 4
+ 2023-03-28
+ 3
+ 9
+ 0.23246
+ 92.56
+ 213.4
+ 0
+ 2
+
+
+ 0
+ oP8178849586
+ 0.899676
+ 1990-09-28
+ 9
+ BAFR
+ 1
+ 0
+ 6
+ 2023-03-28
+ 2
+ 0.429963
+ 14.39
+ 0
+
+
+ 1
+ nH9419631053
+ 0.133587
+ 2
+ AIND
+ Institution Name
+ 3
+ 1
+ 2017-06-10
+ 0.436348
+ 5.39
+ 475.7
+ 1
+ UNP
+ 1
+
+
+ 1
+ JJ3661684122
+ 0
+ 1993-05-19
+ 9
+ 3
+ 2020-06-14
+ 3
+ 2023-03-28
+ 1
+ 5
+ 0.903669
+ 11
+ 141.0
+ 1
+
+
+ 1
+ tN2120744892
+ 0.803122
+ WBRI
+ Institution Name
+ 3
+ 0.964327
+ 95.06
+ 403.6
+ 0
+ 2
+
+
+ 1
+ Zo9779760045
+ 0.767688
+ 1996-08-31
+ 0
+ MWAS
+ 2
+ 1
+ 5
+ 2023-03-28
+ 62
+ 1
+ 2
+
+
+ 0
+ wf3752370095
+ 0.843488
+ 1959-04-17
+ 2
+ APKN
+ Institution Name
+ 3
+ 0
+ 1997-10-01
+ 2
+ 0.712824
+ 16.74
+ 456.3
+ 0
+ 4
+
+
+ 1
+ OW2475789301
+ 0
+ 1971-10-02
+ Institution Name
+ 1
+ 1
+ 1993-10-04
+ 2023-03-28
+ 3
+ 6
+ 0.908092
+ 45
+ 22.98
+ 441.5
+ 0
+ 3
+
+
+ 1
+ Kv3016593719
+ 0.12232
+ 1996-06-05
+ 1
+ BAFR
+ 2
+ 6
+ 10
+ 0.641824
+ 23
+ 36.13
+ 213.1
+ 0
+
+
+ 0
+ TB9669555723
+ 0
+ 1987-10-30
+ 1
+ 0
+ 2
+ 2012-10-02
+ 2
+ 2023-03-28
+ 6
+ 7
+ 37
+ 90.85
+ 28.5
+ 1
+ 1
+ UNA
+ 1
+
+
+ 1
+ QK8499162867
+ 0
+ 1968-11-27
+ ABAN
+ 2
+ 0
+ 6
+ 2018-08-03
+ 9
+ 2023-03-28
+ 2
+ 9
+ 0.078464
+ 43.02
+ 154.7
+ 1
+ 1
+
+
+ 0
+ Wr5514040878
+ 0
+ 0
+ AOTH
+ Institution Name
+ 1
+ 0
+ 2
+ 2015-04-24
+ 9
+ 2023-03-28
+ 1
+ 6
+ 3.51
+ 424.0
+ 2
+
+
+ 0
+ Aj9242652291
+ 0.859218
+ 1968-12-31
+ 0
+ BCRB
+ Institution Name
+ 1
+ 1
+ 1
+ 2003-09-12
+ 5
+ 0.320526
+ 85
+ 98.22
+ 206.6
+ 2
+
+
+ 0
+ Jv2635496195
+ 0.021911
+ 1977-06-27
+ REFU
+ Institution Name
+ 1
+ 6
+ 2022-10-08
+ 6
+ 0.69819
+ 25
+ 29.19
+ 1
+ 1
+ SIC
+
+
+ 1
+ To5555885076
+ 0.786453
+ 1996-11-18
+ 0
+ MWAS
+ Institution Name
+ 3
+ 0
+ 4
+ 2023-03-28
+ 1
+ 0.441344
+ 83
+ 78.29
+ 364.4
+ 1
+
+
+ 0
+ rK9218104079
+ 0.491425
+ 1998-04-15
+ 1
+ 3
+ 0
+ 2023-03-28
+ 4
+ 0.939826
+ 3.1
+ 415.3
+ 4
+
+
+ 1
+ cD9282390165
+ 0.192894
+ 1959-09-25
+ 0
+ REFU
+ Institution Name
+ 2
+ 1
+ 3
+ 1985-12-12
+ 9
+ 0.18449
+ 14
+ 188.4
+ 0
+ 1
+
+
+ 0
+ zU6140515687
+ 0
+ 1962-11-04
+ WBRI
+ 3
+ 1
+ 3
+ 1999-07-14
+ 2018-08-20
+ 8
+ 3
+ 0.222573
+ 65
+ 16.26
+ 1
+
+
+ 1
+ ih3342923522
+ 0.862474
+ 1992-02-18
+ 0
+ WBRI
+ 3
+ 1
+ 2023-03-28
+ 4
+ 0.761443
+ 39
+ 0
+ 2
+
+
+ 1
+ cm3809724991
+ 0
+ 2001-10-29
+ 1
+ AIND
+ 3
+ 0
+ 1
+ 2023-03-28
+ 5
+ 4
+ 0.530908
+ 29
+ 38.71
+ 339.9
+ 0
+
+
+ 1
+ PA8564166424
+ 0.668266
+ 1983-04-13
+ 9
+ Institution Name
+ 1
+ 1
+ 2023-03-28
+ 3
+ 0.707445
+ 1
+ 2
+
+
+ 0
+ QW8564363911
+ 0.978729
+ 1958-04-26
+ 9
+ MWBA
+ Institution Name
+ 1
+ 2002-01-31
+ 1
+ 0.698641
+ 121.9
+ 1
+ 3
+
+
+ 1
+ PQ5842914246
+ 0
+ 1989-06-05
+ 1
+ 1
+ 1
+ 2011-08-31
+ 5
+ 2023-03-28
+ 7
+ 9
+ 0.443976
+ 70
+ 12.2
+ 301.3
+ 0
+ 4
+
+
+ 0
+ ZQ9393137749
+ 0
+ 1981-09-21
+ CHNE
+ 1
+ 1
+ 2001-02-10
+ 6
+ 2023-03-28
+ 4
+ 1
+ 0.821627
+ 94.67
+ 471.5
+ 0
+ 2
+
+
+ 1
+ Pv9093835426
+ 0.561974
+ OOTH
+ Institution Name
+ 2
+ 0
+ 6
+ 2014-09-30
+ 4
+ 0.965936
+ 63
+ 87.59
+ 0
+ SIC
+ 1
+
+
+ 0
+ eW7601111729
+ 0
+ Institution Name
+ 3
+ 0
+ 1
+ 1993-04-18
+ 3
+ 2023-03-28
+ 7
+ 1
+ 0.63075
+ 80
+ 299.1
+ 0
+ 4
+
+
+ 0
+ Jd1465867330
+ 0.034436
+ 2
+ APKN
+ Institution Name
+ 1
+ 1997-01-11
+ 4
+ 0.22182
+ 23
+ 83.01
+ 0
+ 3
+
+
+ 1
+ od1620971821
+ 0
+ 1975-01-19
+ 9
+ WOTH
+ Institution Name
+ 2
+ 0
+ 1
+ 2016-08-20
+ 9
+ 2023-03-28
+ 5
+ 9
+ 87
+ 13.01
+ 1
+
+
\ No newline at end of file
diff --git a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
index 8da0fed3..60a8164c 100644
--- a/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
+++ b/liiatools/spec/social_work_workforce/samples/csww/BAD/social_work_workforce_2022.xml
@@ -2,8 +2,8 @@
CSWW
- 2020
- 2023-03-28
+ 2022
+ 2022-09-30