diff --git a/lib/kiba/extend/mixins/iterative_cleanup.rb b/lib/kiba/extend/mixins/iterative_cleanup.rb index a5fcf7eb8..62dc08d5f 100644 --- a/lib/kiba/extend/mixins/iterative_cleanup.rb +++ b/lib/kiba/extend/mixins/iterative_cleanup.rb @@ -1,118 +1,124 @@ # frozen_string_literal: true -# Mixin module for setting up iterative cleanup based on a source table. -# -# @since 4.0.0 -# -# "Iterative cleanup" means the client may provide the worksheet more -# than once, or that you may need to produce a fresh worksheet for -# the client after a new database export is provided. -# -# Your project must follow some setup/configuration conventions in order to use -# this mixin: -# -# - Each cleanup process must be configured in its own config module. -# - A config module is a Ruby module that responds to `:config`. -# -# Refer to todo:link Kiba::Tms::AltNumsForObjTypeCleanup as an example config -# module extending this mixin module in a simple way. See -# todo:link Kiba::Tms::PlacesCleanupInitial for a more complex usage with -# default overrides and custom pre/post transforms. -# -# ## Implementation details -# -# ### Define before extending this module -# -# These can be defined as Dry::Configurable settings or as public methods. The -# section below lists the method/setting name the extending module should -# respond to, each preceded by its YARD signature. -# -# ``` -# # @return [Symbol] registry entry job key for the job whose output -# # will be used as the base for generating the cleanup worksheet. -# # Iterations of cleanup will be layered over this output in the -# # auto-generated. **NOTE: This job's output should include a field -# # which combines/identifies the original values that may be -# # affected by the cleanup process. The default expectation is that -# # this field is named :fingerprint, but this can be overridden by -# # defining a custom `orig_values_identifier` method in the -# # extending module after extension. This field is used as a -# # matchpoint for merging cleaned up data back into the migration, -# # and identifying whether a given value in subsequent worksheet -# # iterations has been previously included in a worksheet** -# # base_job -# # -# # @return [Array] tags assigned to all jobs generated by extending -# # IterativeCleanup -# # job_tags -# # -# # @return [Array] nil/empty fields to be added to worksheet -# # worksheet_add_fields -# # -# # @return [Array] order of fields (in worksheet output). Will be used -# # to set destination special options/initial headers on the worksheet job. -# # worksheet_field_order -# # -# # @return [Array] fields included in the fingerprint value -# # fingerprint_fields -# # -# # @return [Symbol, Array, nil] field or fields included in -# # the fingerprint value that should be ignored when flagging -# # changes -# # fingerprint_flag_ignore_fields -# ``` -# -# ### Then, extend this module -# -# `extend Kiba::Extend::Mixins::IterativeCleanup` -# -# ### Optional settings/methods in extending module -# -# Default values for the following methods defined in this mixin -# module. If you want to override the values, define these methods -# in your config module after extending this module. -# -# - {collation_delim} -# - {orig_values_identifier} -# - {cleaned_values_identifier} -# - {cleaned_uniq_collate_fields} -# -# ## What extending this module does -# -# ### Defines settings in the extending config module -# -# These are empty settings with constructors that will use the values in a -# client-specific project config file to build the data expected for cleanup -# processing -# -# - **:provided_worksheets** - Array of filenames of cleanup -# worksheets provided to the client. Files should be listed -# oldest-to-newest. Assumes files are in the `to_client` -# subdirectory of the migration base directory. **Define actual -# values in client config file.** -## - **:returned_files** - Array of filenames of completed worksheets -# returned by client. Files should be listed oldest-to-newest. -# Assumes files are in the `supplied` subdirectory of the migration -# base directory. **Define actual values in client config file.** -# -# ### Defines methods in the extending config module -# -# See method documentation inline below. -# -# ### Prepares registry entries for iterative cleanup jobs -# -# When the application loads, {Kiba::Tms::RegistryData.register} calls -# {Kiba::Tms::Utils::IterativeCleanupJobRegistrar}. This util class calls -# the {register_cleanup_jobs} method of each config module extending this -# module, adding the cleanup jobs to the registry dynamically. -# -# The jobs themselves (i.e. the sources, lookups, transforms) are -# defined in {Kiba::Tms::Jobs::IterativeCleanup}. See that module's -# documentation for how to set up custom pre/post transforms to customize -# specific cleanup routines. module Kiba module Extend module Mixins + # Mixin module for setting up iterative cleanup based on a source table. + # + # @since 4.0.0 + # + # "Iterative cleanup" means the client may provide the worksheet more + # than once, or that you may need to produce a fresh worksheet for + # the client after a new database export is provided. + # + # Your project must follow some setup/configuration conventions + # in order to use this mixin: + # + # - Each cleanup process must be configured in its own config module. + # - A config module is a Ruby module that responds to `:config`. + # + # Refer to todo:link Kiba::Tms::AltNumsForObjTypeCleanup as an + # example config module extending this mixin module in a + # simple way. See todo:link Kiba::Tms::PlacesCleanupInitial + # for a more complex usage with default overrides and custom + # pre/post transforms. + # + # ## Implementation details + # + # ### Define before extending this module + # + # These can be defined as Dry::Configurable settings or as + # public methods. The section below lists the method/setting + # name the extending module should respond to, each preceded + # by its YARD signature. + # + # ``` + # # @return [Symbol] registry entry job key for the job whose output + # # will be used as the base for generating the cleanup worksheet. + # # Iterations of cleanup will be layered over this output in the + # # auto-generated. **NOTE: This job's output should include a field + # # which combines/identifies the original values that may be + # # affected by the cleanup process. The default expectation is that + # # this field is named :fingerprint, but this can be overridden by + # # defining a custom `orig_values_identifier` method in the + # # extending module after extension. This field is used as a + # # matchpoint for merging cleaned up data back into the migration, + # # and identifying whether a given value in subsequent worksheet + # # iterations has been previously included in a worksheet** + # # base_job + # # + # # @return [Array] tags assigned to all jobs generated + # # by extending IterativeCleanup + # # job_tags + # # + # # @return [Array] nil/empty fields to be added to worksheet + # # worksheet_add_fields + # # + # # @return [Array] order of fields (in worksheet + # # output). Will be used to set destination special + # # options/initial headers on the worksheet job. + # # worksheet_field_order + # # + # # @return [Array] fields included in the fingerprint value + # # fingerprint_fields + # # + # # @return [Symbol, Array, nil] field or fields included in + # # the fingerprint value that should be ignored when flagging + # # changes + # # fingerprint_flag_ignore_fields + # ``` + # + # ### Then, extend this module + # + # `extend Kiba::Extend::Mixins::IterativeCleanup` + # + # ### Optional settings/methods in extending module + # + # Default values for the following methods defined in this mixin + # module. If you want to override the values, define these methods + # in your config module after extending this module. + # + # - {collation_delim} + # - {orig_values_identifier} + # - {cleaned_values_identifier} + # - {cleaned_uniq_collate_fields} + # + # ## What extending this module does + # + # ### Defines settings in the extending config module + # + # These are empty settings with constructors that will use the + # values in a client-specific project config file to build the + # data expected for cleanup processing + # + # - **:provided_worksheets** - Array of filenames of cleanup + # worksheets provided to the client. Files should be listed + # oldest-to-newest. Assumes files are in the `to_client` + # subdirectory of the migration base directory. **Define actual + # values in client config file.** + ## - **:returned_files** - Array of filenames of completed worksheets + # returned by client. Files should be listed oldest-to-newest. + # Assumes files are in the `supplied` subdirectory of the migration + # base directory. **Define actual values in client config file.** + # + # ### Defines methods in the extending config module + # + # See method documentation inline below. + # + # ### Prepares registry entries for iterative cleanup jobs + # + # When the project application loads, the method that registers + # the project's registry entries calls + # {Kiba::Extend::Utils::IterativeCleanupJobRegistrar}. This + # util class calls the {register_cleanup_jobs} method of each + # config module extending this module, adding the cleanup jobs + # to the registry dynamically. + # + # The jobs themselves (i.e. the sources, lookups, transforms) + # are defined in + # {Kiba::Extend::Mixins::IterativeCleanup::Jobs}. See that + # module's documentation for how to set up custom pre/post + # transforms to customize specific cleanup routines. module IterativeCleanup def self.extended(mod) check_required_settings(mod) @@ -374,7 +380,8 @@ def base_job_cleaned_job_hash(mod) path: File.join(Kiba::Extend::Mixins::IterativeCleanup.datadir(mod), "working", "#{mod.cleanup_base_name}_base_job_cleaned.csv"), creator: { - callee: Kiba::Extend::Mixins::IterativeCleanup::BaseJobCleaned, + callee: + Kiba::Extend::Mixins::IterativeCleanup::Jobs::BaseJobCleaned, args: {mod: mod} }, tags: mod.job_tags, @@ -388,7 +395,7 @@ def cleaned_uniq_job_hash(mod) path: File.join(Kiba::Extend::Mixins::IterativeCleanup.datadir(mod), "working", "#{mod.cleanup_base_name}_cleaned_uniq.csv"), creator: { - callee: Kiba::Extend::Mixins::IterativeCleanup::CleanedUniq, + callee: Kiba::Extend::Mixins::IterativeCleanup::Jobs::CleanedUniq, args: {mod: mod} }, tags: mod.job_tags @@ -401,7 +408,7 @@ def worksheet_job_hash(mod) path: File.join(Kiba::Extend::Mixins::IterativeCleanup.datadir(mod), "to_client", "#{mod.cleanup_base_name}_worksheet.csv"), creator: { - callee: Kiba::Extend::Mixins::IterativeCleanup::Worksheet, + callee: Kiba::Extend::Mixins::IterativeCleanup::Jobs::Worksheet, args: {mod: mod} }, tags: mod.job_tags, @@ -415,7 +422,8 @@ def returned_compiled_job_hash(mod) path: File.join(Kiba::Extend::Mixins::IterativeCleanup.datadir(mod), "working", "#{mod.cleanup_base_name}_returned_compiled.csv"), creator: { - callee: Kiba::Extend::Mixins::IterativeCleanup::ReturnedCompiled, + callee: + Kiba::Extend::Mixins::IterativeCleanup::Jobs::ReturnedCompiled, args: {mod: mod} }, tags: mod.job_tags @@ -428,7 +436,7 @@ def corrections_job_hash(mod) path: File.join(Kiba::Extend::Mixins::IterativeCleanup.datadir(mod), "working", "#{mod.cleanup_base_name}_corrections.csv"), creator: { - callee: Kiba::Extend::Mixins::IterativeCleanup::Corrections, + callee: Kiba::Extend::Mixins::IterativeCleanup::Jobs::Corrections, args: {mod: mod} }, tags: mod.job_tags, diff --git a/lib/kiba/extend/mixins/iterative_cleanup/base_job_cleaned.rb b/lib/kiba/extend/mixins/iterative_cleanup/base_job_cleaned.rb deleted file mode 100644 index 776c1637a..000000000 --- a/lib/kiba/extend/mixins/iterative_cleanup/base_job_cleaned.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -module Kiba - module Extend - module Mixins - module IterativeCleanup - module BaseJobCleaned - module_function - - def job(mod:) - Kiba::Extend::Jobs::Job.new( - files: { - source: mod.base_job, - destination: mod.base_job_cleaned_job_key, - lookup: get_lookups(mod) - }, - transformer: get_xforms(mod) - ) - end - - def get_lookups(mod) - base = [] - base << mod.corrections_job_key if mod.cleanup_done? - base.select { |job| Kiba::Extend::Job.output?(job) } - end - - def get_xforms(mod) - base = [] - if mod.respond_to?(:base_job_cleaned_pre_xforms) - base << mod.base_job_cleaned_pre_xforms - end - base << xforms(mod) - if mod.respond_to?(:base_job_cleaned_post_xforms) - base << mod.base_job_cleaned_post_xforms - end - base - end - - def xforms(mod) - bind = binding - - Kiba.job_segment do - job = bind.receiver - lookups = job.send(:get_lookups, mod) - - transform Append::NilFields, - fields: mod.worksheet_add_fields - - # Add :fingerprint (orig values) before merging any cleanup in - transform Fingerprint::Add, - target: :fingerprint, - fields: mod.fingerprint_fields - - if mod.cleanup_done? && lookups.any?(mod.corrections_job_key) - transform Fingerprint::MergeCorrected, - lookup: method(mod.corrections_job_key).call, - keycolumn: mod.orig_values_identifier, - todofield: :corrected - end - - transform Fingerprint::Add, - target: :clean_fingerprint, - fields: mod.fingerprint_fields - end - end - end - end - end - end -end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/cleaned_uniq.rb b/lib/kiba/extend/mixins/iterative_cleanup/cleaned_uniq.rb deleted file mode 100644 index 5e4c6b6d5..000000000 --- a/lib/kiba/extend/mixins/iterative_cleanup/cleaned_uniq.rb +++ /dev/null @@ -1,88 +0,0 @@ -# frozen_string_literal: true - -module Kiba - module Extend - module Mixins - module IterativeCleanup - module CleanedUniq - module_function - - def job(mod:) - Kiba::Extend::Jobs::Job.new( - files: { - source: mod.base_job_cleaned_job_key, - destination: mod.cleaned_uniq_job_key, - lookup: get_lookups(mod) - }, - transformer: get_xforms(mod) - ) - end - - def get_lookups(mod) - base = [mod.base_job_cleaned_job_key] - base.select { |job| Kiba::Extend::Job.output?(job) } - end - - def get_xforms(mod) - base = [] - if mod.respond_to?(:cleaned_uniq_pre_xforms) - base << mod.cleaned_uniq_pre_xforms - end - - base << (mod.cleanup_done? ? cleaned_xforms(mod) : orig_xforms(mod)) - - if mod.respond_to?(:cleaned_uniq_post_xforms) - base << mod.cleaned_uniq_post_xforms - end - base - end - - def orig_xforms(mod) - bind = binding - - Kiba.job_segment do - transform Rename::Fields, - fieldmap: bind.receiver.send(:fieldmap, mod) - .invert - .reject { |key, val| key == val } - end - end - - def cleaned_xforms(mod) - bind = binding - - Kiba.job_segment do - job = bind.receiver - - transform Deduplicate::Table, - field: mod.cleaned_values_identifier, - delete_field: false - transform Delete::Fields, - fields: mod.cleaned_uniq_collate_fields - transform Merge::MultiRowLookup, - lookup: send(mod.base_job_cleaned_job_key), - keycolumn: mod.cleaned_values_identifier, - fieldmap: job.send(:fieldmap, mod), - delim: mod.collation_delim - end - end - - def fieldmap(mod) - mod.cleaned_uniq_collate_fields.map do |field| - field_mapping(field) - end.to_h - end - - def field_mapping(field) - if field.to_s.end_with?("s") - [field, field] - else - ["#{field}s".to_sym, field] - end - end - private :field_mapping - end - end - end - end -end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/corrections.rb b/lib/kiba/extend/mixins/iterative_cleanup/corrections.rb deleted file mode 100644 index b5937be39..000000000 --- a/lib/kiba/extend/mixins/iterative_cleanup/corrections.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -module Kiba - module Extend - module Mixins - module IterativeCleanup - module Corrections - module_function - - def job(mod:) - return unless mod.cleanup_done? - - Kiba::Extend::Jobs::Job.new( - files: { - source: mod.returned_compiled_job_key, - destination: mod.corrections_job_key - }, - transformer: get_xforms(mod) - ) - end - - def get_xforms(mod) - base = [] - if mod.respond_to?(:corrections_pre_xforms) - base << mod.corrections_pre_xforms - end - - base << xforms(mod) - - if mod.respond_to?(:corrections_post_xforms) - base << mod.corrections_post_xforms - end - base - end - - def xforms(mod) - Kiba.job_segment do - transform FilterRows::FieldPopulated, - action: :keep, - field: :corrected - transform Explode::RowsFromMultivalField, - field: mod.collated_orig_values_id_field, - delim: mod.collation_delim - transform Rename::Field, - from: mod.collated_orig_values_id_field, - to: mod.orig_values_identifier - transform CombineValues::FullRecord - transform Deduplicate::Table, - field: :index, - delete_field: true - end - end - end - end - end - end -end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/jobs.rb b/lib/kiba/extend/mixins/iterative_cleanup/jobs.rb new file mode 100644 index 000000000..c86d68d79 --- /dev/null +++ b/lib/kiba/extend/mixins/iterative_cleanup/jobs.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Mixins + module IterativeCleanup + # Namespace for jobs set up via extending the {Mixins::IterativeCleanup} + # module + # + # Each job is passed `mod` when it is called. This is the + # cleanup config module that extends + # {Mixins::IterativeCleanup}. The job refers to + # configuration settings from the config module to + # dynamically define the job at runtime. + # + # Each job defined in this namespace has a set of standard + # transforms, which can be viewed in the source of its + # `xforms` method + # + # The extending config module may define custom transforms to + # be run pre and/or post the standard transforms for each + # job. The pattern for doing this is: + # + # - Take the name of the relevant job module, e.g. + # - {Jobs::BaseJobCleaned} + # - Convert it to lowercase snake case, e.g. base_job_cleaned + # - Indicate pre or post standard transforms: e.g. + # base_job_cleaned_pre_xforms or + # base_job_cleaned_post_xforms. **This is the name of the + # method you define in the extending configuration module**. + # - The method definition should be just as the `xforms` + # methods in all jobs. It should be a set of transforms + # defined within a `Kiba.job_segment` block. If the custom + # xforms method needs to call methods/settings defined in + # the config module, use `binding` as shown below: + # + # ```ruby + # def base_job_cleaned_post_xforms + # bind = binding + # + # Kiba.job_segment do + # mod = bind.receiver + # + # transform Delete::Fields, + # fields: mod.post_xform_delete_fields + # end + # end + # ``` + module Jobs + end + end + end + end +end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/jobs/base_job_cleaned.rb b/lib/kiba/extend/mixins/iterative_cleanup/jobs/base_job_cleaned.rb new file mode 100644 index 000000000..c19d06b03 --- /dev/null +++ b/lib/kiba/extend/mixins/iterative_cleanup/jobs/base_job_cleaned.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Mixins + module IterativeCleanup + module Jobs + module BaseJobCleaned + module_function + + def job(mod:) + Kiba::Extend::Jobs::Job.new( + files: { + source: mod.base_job, + destination: mod.base_job_cleaned_job_key, + lookup: get_lookups(mod) + }, + transformer: get_xforms(mod) + ) + end + + def get_lookups(mod) + base = [] + base << mod.corrections_job_key if mod.cleanup_done? + base.select { |job| Kiba::Extend::Job.output?(job) } + end + + def get_xforms(mod) + base = [] + if mod.respond_to?(:base_job_cleaned_pre_xforms) + base << mod.base_job_cleaned_pre_xforms + end + base << xforms(mod) + if mod.respond_to?(:base_job_cleaned_post_xforms) + base << mod.base_job_cleaned_post_xforms + end + base + end + + def xforms(mod) + bind = binding + + Kiba.job_segment do + job = bind.receiver + lookups = job.send(:get_lookups, mod) + + transform Append::NilFields, + fields: mod.worksheet_add_fields + + # Add :fingerprint (orig values) before merging any cleanup in + transform Fingerprint::Add, + target: :fingerprint, + fields: mod.fingerprint_fields + + if mod.cleanup_done? && lookups.any?(mod.corrections_job_key) + transform Fingerprint::MergeCorrected, + lookup: method(mod.corrections_job_key).call, + keycolumn: mod.orig_values_identifier, + todofield: :corrected + end + + transform Fingerprint::Add, + target: :clean_fingerprint, + fields: mod.fingerprint_fields + end + end + end + end + end + end + end +end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/jobs/cleaned_uniq.rb b/lib/kiba/extend/mixins/iterative_cleanup/jobs/cleaned_uniq.rb new file mode 100644 index 000000000..cba5e082d --- /dev/null +++ b/lib/kiba/extend/mixins/iterative_cleanup/jobs/cleaned_uniq.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Mixins + module IterativeCleanup + module Jobs + module CleanedUniq + module_function + + def job(mod:) + Kiba::Extend::Jobs::Job.new( + files: { + source: mod.base_job_cleaned_job_key, + destination: mod.cleaned_uniq_job_key, + lookup: get_lookups(mod) + }, + transformer: get_xforms(mod) + ) + end + + def get_lookups(mod) + base = [mod.base_job_cleaned_job_key] + base.select { |job| Kiba::Extend::Job.output?(job) } + end + + def get_xforms(mod) + base = [] + if mod.respond_to?(:cleaned_uniq_pre_xforms) + base << mod.cleaned_uniq_pre_xforms + end + + base << if mod.cleanup_done? + cleaned_xforms(mod) + else + orig_xforms(mod) + end + + if mod.respond_to?(:cleaned_uniq_post_xforms) + base << mod.cleaned_uniq_post_xforms + end + base + end + + def orig_xforms(mod) + bind = binding + + Kiba.job_segment do + transform Rename::Fields, + fieldmap: bind.receiver.send(:fieldmap, mod) + .invert + .reject { |key, val| key == val } + end + end + + def cleaned_xforms(mod) + bind = binding + + Kiba.job_segment do + job = bind.receiver + + transform Deduplicate::Table, + field: mod.cleaned_values_identifier, + delete_field: false + transform Delete::Fields, + fields: mod.cleaned_uniq_collate_fields + transform Merge::MultiRowLookup, + lookup: send(mod.base_job_cleaned_job_key), + keycolumn: mod.cleaned_values_identifier, + fieldmap: job.send(:fieldmap, mod), + delim: mod.collation_delim + end + end + + def fieldmap(mod) + mod.cleaned_uniq_collate_fields.map do |field| + field_mapping(field) + end.to_h + end + + def field_mapping(field) + if field.to_s.end_with?("s") + [field, field] + else + ["#{field}s".to_sym, field] + end + end + private :field_mapping + end + end + end + end + end +end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/jobs/corrections.rb b/lib/kiba/extend/mixins/iterative_cleanup/jobs/corrections.rb new file mode 100644 index 000000000..7ca7cce5b --- /dev/null +++ b/lib/kiba/extend/mixins/iterative_cleanup/jobs/corrections.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Mixins + module IterativeCleanup + module Jobs + module Corrections + module_function + + def job(mod:) + return unless mod.cleanup_done? + + Kiba::Extend::Jobs::Job.new( + files: { + source: mod.returned_compiled_job_key, + destination: mod.corrections_job_key + }, + transformer: get_xforms(mod) + ) + end + + def get_xforms(mod) + base = [] + if mod.respond_to?(:corrections_pre_xforms) + base << mod.corrections_pre_xforms + end + + base << xforms(mod) + + if mod.respond_to?(:corrections_post_xforms) + base << mod.corrections_post_xforms + end + base + end + + def xforms(mod) + Kiba.job_segment do + transform FilterRows::FieldPopulated, + action: :keep, + field: :corrected + transform Explode::RowsFromMultivalField, + field: mod.collated_orig_values_id_field, + delim: mod.collation_delim + transform Rename::Field, + from: mod.collated_orig_values_id_field, + to: mod.orig_values_identifier + transform CombineValues::FullRecord + transform Deduplicate::Table, + field: :index, + delete_field: true + end + end + end + end + end + end + end +end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/jobs/returned_compiled.rb b/lib/kiba/extend/mixins/iterative_cleanup/jobs/returned_compiled.rb new file mode 100644 index 000000000..2db96fe6c --- /dev/null +++ b/lib/kiba/extend/mixins/iterative_cleanup/jobs/returned_compiled.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Mixins + module IterativeCleanup + module Jobs + module ReturnedCompiled + module_function + + def job(mod:) + Kiba::Extend::Jobs::Job.new( + files: { + source: mod.returned_file_jobs, + destination: mod.returned_compiled_job_key + }, + transformer: get_xforms(mod) + ) + end + + def get_xforms(mod) + base = [] + if mod.respond_to?(:returned_compiled_pre_xforms) + base << mod.returned_compiled_pre_xforms + end + + base << xforms(mod) + + if mod.respond_to?(:returned_compiled_post_xforms) + base << mod.returned_compiled_post_xforms + end + base + end + + def xforms(mod) + Kiba.job_segment do + transform Delete::Fields, + fields: :to_review + transform Fingerprint::FlagChanged, + fingerprint: :clean_fingerprint, + source_fields: mod.fingerprint_fields, + delete_fp: true, + target: :corrected + transform Delete::FieldnamesStartingWith, + prefix: "fp_" + transform Clean::EnsureConsistentFields + end + end + end + end + end + end + end +end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/jobs/worksheet.rb b/lib/kiba/extend/mixins/iterative_cleanup/jobs/worksheet.rb new file mode 100644 index 000000000..2933d7bc7 --- /dev/null +++ b/lib/kiba/extend/mixins/iterative_cleanup/jobs/worksheet.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Mixins + module IterativeCleanup + module Jobs + module Worksheet + module_function + + def job(mod:) + Kiba::Extend::Jobs::Job.new( + files: { + source: mod.cleaned_uniq_job_key, + destination: mod.worksheet_job_key + }, + transformer: get_xforms(mod) + ) + end + + def get_lookups(mod) + if mod.cleanup_done? + # todo + elsif mod.worksheet_sent_not_done? + # todo + else + [] + end + end + + def get_xforms(mod) + base = [] + if mod.respond_to?(:worksheet_pre_xforms) + base << mod.worksheet_pre_xforms + end + base << xforms(mod) + if mod.respond_to?(:worksheet_post_xforms) + base << mod.worksheet_post_xforms + end + base + end + + def xforms(mod) + Kiba.job_segment do + transform Append::NilFields, + fields: mod.worksheet_add_fields + transform Fingerprint::Add, + target: :clean_fingerprint, + fields: mod.fingerprint_fields + + unless mod.provided_worksheets.empty? + # rubocop:disable Layout/LineLength + known_vals = + Kiba::Extend::Mixins::IterativeCleanup::KnownWorksheetValues.new( + mod + ).call + # rubocop:enable Layout/LineLength + transform Append::NilFields, + fields: :to_review + transform do |row| + ids = row[mod.collated_orig_values_id_field] + next row if ids.blank? + + known = ids.split(mod.collation_delim) + .map { |id| known_vals.include?(id) } + .all? + next row if known + + row[:to_review] = "y" + row + end + end + end + end + end + end + end + end + end +end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/returned_compiled.rb b/lib/kiba/extend/mixins/iterative_cleanup/returned_compiled.rb deleted file mode 100644 index d2cafac17..000000000 --- a/lib/kiba/extend/mixins/iterative_cleanup/returned_compiled.rb +++ /dev/null @@ -1,52 +0,0 @@ -# frozen_string_literal: true - -module Kiba - module Extend - module Mixins - module IterativeCleanup - module ReturnedCompiled - module_function - - def job(mod:) - Kiba::Extend::Jobs::Job.new( - files: { - source: mod.returned_file_jobs, - destination: mod.returned_compiled_job_key - }, - transformer: get_xforms(mod) - ) - end - - def get_xforms(mod) - base = [] - if mod.respond_to?(:returned_compiled_pre_xforms) - base << mod.returned_compiled_pre_xforms - end - - base << xforms(mod) - - if mod.respond_to?(:returned_compiled_post_xforms) - base << mod.returned_compiled_post_xforms - end - base - end - - def xforms(mod) - Kiba.job_segment do - transform Delete::Fields, - fields: :to_review - transform Fingerprint::FlagChanged, - fingerprint: :clean_fingerprint, - source_fields: mod.fingerprint_fields, - delete_fp: true, - target: :corrected - transform Delete::FieldnamesStartingWith, - prefix: "fp_" - transform Clean::EnsureConsistentFields - end - end - end - end - end - end -end diff --git a/lib/kiba/extend/mixins/iterative_cleanup/worksheet.rb b/lib/kiba/extend/mixins/iterative_cleanup/worksheet.rb deleted file mode 100644 index 0c55d86f2..000000000 --- a/lib/kiba/extend/mixins/iterative_cleanup/worksheet.rb +++ /dev/null @@ -1,78 +0,0 @@ -# frozen_string_literal: true - -module Kiba - module Extend - module Mixins - module IterativeCleanup - module Worksheet - module_function - - def job(mod:) - Kiba::Extend::Jobs::Job.new( - files: { - source: mod.cleaned_uniq_job_key, - destination: mod.worksheet_job_key - }, - transformer: get_xforms(mod) - ) - end - - def get_lookups(mod) - if mod.cleanup_done? - # todo - elsif mod.worksheet_sent_not_done? - # todo - else - [] - end - end - - def get_xforms(mod) - base = [] - if mod.respond_to?(:worksheet_pre_xforms) - base << mod.worksheet_pre_xforms - end - base << xforms(mod) - if mod.respond_to?(:worksheet_post_xforms) - base << mod.worksheet_post_xforms - end - base - end - - def xforms(mod) - Kiba.job_segment do - transform Append::NilFields, - fields: mod.worksheet_add_fields - transform Fingerprint::Add, - target: :clean_fingerprint, - fields: mod.fingerprint_fields - - unless mod.provided_worksheets.empty? - # rubocop:disable Layout/LineLength - known_vals = - Kiba::Extend::Mixins::IterativeCleanup::KnownWorksheetValues.new( - mod - ).call - # rubocop:enable Layout/LineLength - transform Append::NilFields, - fields: :to_review - transform do |row| - ids = row[mod.collated_orig_values_id_field] - next row if ids.blank? - - known = ids.split(mod.collation_delim) - .map { |id| known_vals.include?(id) } - .all? - next row if known - - row[:to_review] = "y" - row - end - end - end - end - end - end - end - end -end