From 6460a0e54d2a9358d237d3c10527c572b96d03a5 Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Tue, 31 Aug 2021 19:21:30 -0400 Subject: [PATCH] increase documentation coverage; restructure into Registry module; update some tests; add simplecov --- .gitignore | 2 +- Gemfile | 13 + Gemfile.lock | 10 +- kiba-extend.gemspec | 14 +- lib/kiba/extend.rb | 2 +- lib/kiba/extend/destinations.rb | 11 + lib/kiba/extend/destinations/csv.rb | 10 + lib/kiba/extend/fieldset.rb | 62 ----- lib/kiba/extend/jobs.rb | 47 ++++ lib/kiba/extend/jobs/base_job.rb | 41 ---- lib/kiba/extend/registry.rb | 18 ++ lib/kiba/extend/registry/file_registry.rb | 115 ++++----- .../extend/registry/file_registry_entry.rb | 222 +++++++++--------- .../extend/registry/registered_destination.rb | 88 +++---- lib/kiba/extend/registry/registered_file.rb | 48 ++-- lib/kiba/extend/registry/registered_lookup.rb | 65 ++--- lib/kiba/extend/registry/registered_source.rb | 28 ++- .../registry/registry_entry_selector.rb | 72 +++--- lib/kiba/extend/registry/registry_list.rb | 17 +- .../extend/registry/registry_validator.rb | 96 ++++---- lib/kiba/extend/registry/requirable_file.rb | 33 +-- .../extend/registry/source_dest_registry.rb | 104 ++++---- lib/kiba/extend/sources.rb | 11 + lib/kiba/extend/transforms/deduplicate.rb | 1 + lib/kiba/extend/transforms/extract.rb | 1 + lib/kiba/extend/transforms/merge.rb | 4 +- lib/kiba/extend/utils/fieldset.rb | 65 +++++ lib/kiba/extend/utils/lookup.rb | 8 +- lib/kiba/extend/version.rb | 2 +- spec/kiba/extend/jobs/job_spec.rb | 4 +- .../registry/file_registry_entry_spec.rb | 4 +- .../extend/registry/file_registry_spec.rb | 22 +- .../registry/registered_destination_spec.rb | 9 +- .../extend/registry/registered_file_spec.rb | 17 +- .../extend/registry/registered_lookup_spec.rb | 11 +- .../extend/registry/registered_source_spec.rb | 9 +- .../registry/registry_entry_selector_spec.rb | 6 +- .../registry/registry_validator_spec.rb | 6 +- .../extend/registry/requirable_file_spec.rb | 12 +- spec/kiba/extend/{ => utils}/fieldset_spec.rb | 4 +- spec/spec_helper.rb | 4 + 41 files changed, 750 insertions(+), 568 deletions(-) create mode 100644 lib/kiba/extend/destinations.rb delete mode 100644 lib/kiba/extend/fieldset.rb create mode 100644 lib/kiba/extend/registry.rb create mode 100644 lib/kiba/extend/sources.rb create mode 100644 lib/kiba/extend/utils/fieldset.rb rename spec/kiba/extend/{ => utils}/fieldset_spec.rb (93%) diff --git a/.gitignore b/.gitignore index e79de1a20..1d281f8f0 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,4 @@ .rspec_status .byebug_history -**/.~lock* \ No newline at end of file +**/.~lock*coverage diff --git a/Gemfile b/Gemfile index 3bb965787..c8a711e7b 100644 --- a/Gemfile +++ b/Gemfile @@ -4,5 +4,18 @@ source 'https://rubygems.org' git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } +group :development, :test do + gem 'bundler', '>= 1.17' + gem 'byebug', '~>11.0' + gem 'pry', '~> 0.14' + gem 'rake', '~> 13.0' + gem 'rspec', '~> 3.0' + gem 'rubocop', '~> 1.18.4' + gem 'rubocop-rspec', '~> 2.4.0' +end + +group :test do + gem 'simplecov', require: false +end # Specify your gem's dependencies in kiba-extend.gemspec gemspec diff --git a/Gemfile.lock b/Gemfile.lock index b0e82b010..4289f29bc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - kiba-extend (2.2.0) + kiba-extend (2.2.1) activesupport (~> 6.1.4) csv (~> 3.0) dry-configurable (~> 0.11) @@ -25,6 +25,7 @@ GEM concurrent-ruby (1.1.9) csv (3.2.0) diff-lcs (1.3) + docile (1.4.0) dry-configurable (0.12.1) concurrent-ruby (~> 1.0) dry-core (~> 0.5, >= 0.5.0) @@ -78,6 +79,12 @@ GEM rubocop (~> 1.0) rubocop-ast (>= 1.1.0) ruby-progressbar (1.11.0) + simplecov (0.21.2) + docile (~> 1.1) + simplecov-html (~> 0.11) + simplecov_json_formatter (~> 0.1) + simplecov-html (0.12.3) + simplecov_json_formatter (0.1.3) tzinfo (2.0.4) concurrent-ruby (~> 1.0) unicode-display_width (2.0.0) @@ -96,6 +103,7 @@ DEPENDENCIES rspec (~> 3.0) rubocop (~> 1.18.4) rubocop-rspec (~> 2.4.0) + simplecov BUNDLED WITH 2.1.4 diff --git a/kiba-extend.gemspec b/kiba-extend.gemspec index cd1756d3a..ff082a198 100644 --- a/kiba-extend.gemspec +++ b/kiba-extend.gemspec @@ -43,11 +43,11 @@ Gem::Specification.new do |spec| spec.add_dependency 'kiba-common', '~> 1.5.0' spec.add_dependency 'xxhash', '~> 0.4' - spec.add_development_dependency 'bundler', '>= 1.17' - spec.add_development_dependency 'byebug', '~>11.0' - spec.add_development_dependency 'pry', '~> 0.14' - spec.add_development_dependency 'rake', '~> 13.0' - spec.add_development_dependency 'rspec', '~> 3.0' - spec.add_development_dependency 'rubocop', '~> 1.18.4' - spec.add_development_dependency 'rubocop-rspec', '~> 2.4.0' + # spec.add_development_dependency 'bundler', '>= 1.17' + # spec.add_development_dependency 'byebug', '~>11.0' + # spec.add_development_dependency 'pry', '~> 0.14' + # spec.add_development_dependency 'rake', '~> 13.0' + # spec.add_development_dependency 'rspec', '~> 3.0' + # spec.add_development_dependency 'rubocop', '~> 1.18.4' + # spec.add_development_dependency 'rubocop-rspec', '~> 2.4.0' end diff --git a/lib/kiba/extend.rb b/lib/kiba/extend.rb index 2cd42d824..c13527bb3 100644 --- a/lib/kiba/extend.rb +++ b/lib/kiba/extend.rb @@ -57,7 +57,7 @@ module Extend # Prefix for warnings from the ETL setting :warning_label, 'KIBA WARNING', reader: true - setting :registry, Kiba::Extend::FileRegistry.new, reader: true + setting :registry, Kiba::Extend::Registry::FileRegistry.new, reader: true setting :job, reader: true do # Whether to output results to STDOUT for debugging diff --git a/lib/kiba/extend/destinations.rb b/lib/kiba/extend/destinations.rb new file mode 100644 index 000000000..0f0790cec --- /dev/null +++ b/lib/kiba/extend/destinations.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Kiba + module Extend + # Classes defining data outputs + # + # This structure follows the pattern set out in {https://github.com/thbar/kiba-common kiba-common} + module Destinations + end + end +end diff --git a/lib/kiba/extend/destinations/csv.rb b/lib/kiba/extend/destinations/csv.rb index 4ff091382..4d636ccd0 100644 --- a/lib/kiba/extend/destinations/csv.rb +++ b/lib/kiba/extend/destinations/csv.rb @@ -5,9 +5,17 @@ module Kiba module Extend module Destinations + # An extension of Kiba::Common's CSV destination, adding the `initial_headers` option class CSV attr_reader :filename, :csv_options, :csv, :headers + # @param filename [String] path for writing CSV + # @param csv_options [Hash] options passable to CSV objects. Refer to + # https://rubyapi.org/2.7/o/csv#method-c-new for details + # @param headers Don't use this + # @param initial_headers [Array] names of fields in the order you want them output in the + # CSV. Any you do not explicitly include here will be appended in whatever order they got + # created/processed in, to the right of the ones named here. def initialize(filename:, csv_options: nil, headers: nil, initial_headers: []) @filename = filename @csv_options = csv_options || {} @@ -15,6 +23,7 @@ def initialize(filename:, csv_options: nil, headers: nil, initial_headers: []) @initial_headers = initial_headers end + # @private def write(row) @csv ||= ::CSV.open(filename, 'wb', csv_options) @headers ||= row.keys @@ -23,6 +32,7 @@ def write(row) csv << row.fetch_values(*@headers) end + # @private def close csv&.close end diff --git a/lib/kiba/extend/fieldset.rb b/lib/kiba/extend/fieldset.rb deleted file mode 100644 index 1b7321732..000000000 --- a/lib/kiba/extend/fieldset.rb +++ /dev/null @@ -1,62 +0,0 @@ -# frozen_string_literal: true - -module Kiba - module Extend - class Fieldset - def initialize(fields) - @hash = {} - fields.each { |field| @hash[field] = [] } - end - - def add_constant_values(field, value) - @hash[field] = [] - value_ct.times { @hash[field] << value } - end - - def fields - @hash.keys - end - - attr_reader :hash - - def join_values(delim) - @hash.transform_values! { |vals| vals.join(delim) } - end - - def populate(rows) - return if rows.empty? - - rows.each { |row| get_field_values(row) } - remove_valueless_rows - end - - def value_ct - @hash.values.first.length - end - - private - - def get_field_values(row) - fields.each do |field| - fetched = row.fetch(field, nil) - value = fetched.blank? ? nil : fetched - @hash[field] << value - end - end - - def remove_valueless_rows - valueless_indices.each do |index| - @hash.each { |_field, values| values.delete_at(index) } - end - end - - def valueless_indices - indices = [] - @hash.values.first.each_with_index do |_element, i| - indices << i if @hash.values.map { |vals| vals[i] }.compact.empty? - end - indices.sort.reverse - end - end - end -end diff --git a/lib/kiba/extend/jobs.rb b/lib/kiba/extend/jobs.rb index 604159081..e9801cc82 100644 --- a/lib/kiba/extend/jobs.rb +++ b/lib/kiba/extend/jobs.rb @@ -6,6 +6,53 @@ module Kiba module Extend + # Reusable, composable patterns for jobs + # + # Heretofore, I have been repeating tons of code/logic for setting up a job in migration code: + # + # - Defining sources/destinations, @srcrows, @outrows + # - Changing CSV rows to hashes (initial transforms) + # - Changing hashes back to CSV rows + # - Calling postprocessing + # + # Most of this never changes, and when it does there is way too much tedious work in a given migration + # to make it consistent across all jobs. + # + # This is an attempt to dry up calling jobs and make it possible to test them via RSpec + # + # Running `Kiba.parse` to define a job generates a + # {https://github.com/thbar/kiba/blob/master/lib/kiba/control.rb Kiba::Control} + # object, which is a wrapper bundling together: pre_processes, config, sources, transforms, destinations, and + # post_processes. + # + # As described {https://github.com/thbar/kiba/wiki/Implementing-pre-and-post-processors here}, pre_ and post_ + # processors get called once per ETL run---either before or after the ETL starts working through the source + # rows + # + # This Kiba::Control object created by Kiba.parse is generated with a particular Kiba::Context, and + # once created, you cannot get access to or manipulate variables or configuration that the entire + # job needs to know about. + # + # What Kiba::Extend::Jobs adds is the ability to set up reusable initial_transformers and final_transformers. + # Basically, job templates where just the meat of the transformations change. + # + # `files` is the configuration of destination, source, and lookup files the job will use. It is a Hash, with + # the following format: + # + # { source: [registry_key, registry_key], destination: [registry_key], lookup: [registry_key] } + # + # { source: [registry_key, registry_key], destination: [registry_key]} + # + # `source` and `destination` must each have at least one registry key. `lookup` may be omitted, or it may + # be included with one or more registry keys + # + # `transformer` is a sequence of data transformations that could theoretically be called with interchangable + # input/output settings (i.e. `materials`). + # + # In project code, instead of defining an entire job in a `Kiba.parse` block, you will define a + # `Kiba.job_segment` block containing just the transforms unique to that job. + # + # @since 2.2.0 module Jobs end end diff --git a/lib/kiba/extend/jobs/base_job.rb b/lib/kiba/extend/jobs/base_job.rb index 3c9353f39..164690eaf 100644 --- a/lib/kiba/extend/jobs/base_job.rb +++ b/lib/kiba/extend/jobs/base_job.rb @@ -8,47 +8,6 @@ module Kiba module Extend - # Reusable, composable patterns for jobs - # - # Heretofore, I have been repeating tons of code/logic for setting up a job in migration code: - # - # - Defining sources/destinations, @srcrows, @outrows - # - Changing CSV rows to hashes (initial transforms) - # - Changing hashes back to CSV rows - # - Calling postprocessing - # - # Most of this never changes, and when it does there is way too much tedious work in a given migration - # to make it consistent across all jobs. - # - # This is an attempt to dry up calling jobs and make it possible to test them automatically with stubbed-in - # enumerable sources/destinations - # - # Running `Kiba.parse` to define a job generates a {https://github.com/thbar/kiba/blob/master/lib/kiba/control.rb Kiba::Control} - # object, which is a wrapper bundling together: pre_processes, config, sources, transforms, destinations, and - # post_processes. - # - # As described {https://github.com/thbar/kiba/wiki/Implementing-pre-and-post-processors here}, pre_ and post_ - # processors get called once per ETL run---either before or after the ETL starts working through the source - # rows - # - # What Kiba::Extend::Jobs add is the ability to set up reusable initial_transformers and final_transformers. - # Basically, job templates where just the meat of the transformations change. - # - # `files` is the configuration of destination, source, and lookup files the job will use. It is a Hash, with - # the following format: - # - # { source: [registry_key, registry_key], destination: [registry_key], lookup: [registry_key] } - # - # { source: [registry_key, registry_key], destination: [registry_key]} - # - # `source` and `destination` must each have at least one registry key. `lookup` may be omitted, or it may - # be included with one or more registry keys - # - # `transformer` is a sequence of data transformations that could theoretically be called with interchangable - # input/output settings (i.e. `materials`). In practice, a `recipe` is usually closely tied to particular tables, because - # fields are manipulated by name. However, this should support easier automated testing of `recipes`. - # - # @since 2.2.0 module Jobs # Abstract definition of Job and job interface # diff --git a/lib/kiba/extend/registry.rb b/lib/kiba/extend/registry.rb new file mode 100644 index 000000000..801087697 --- /dev/null +++ b/lib/kiba/extend/registry.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Kiba + module Extend + # Support for defining project-specific file registry + # + # This DRYs up the process of setting up job configs (i.e. the source, lookup, and destination files + # for that job. + # + # This also allows for automated calling of dependencies instead of having to redundantly + # hard code them for every job. If the file(s) needed as sources or lookups do not exist, + # their creator jobs will be run to create them. + # + # @since 2.2.0 + module Registry + end + end +end diff --git a/lib/kiba/extend/registry/file_registry.rb b/lib/kiba/extend/registry/file_registry.rb index 5f77ce5ab..6530ac7d7 100644 --- a/lib/kiba/extend/registry/file_registry.rb +++ b/lib/kiba/extend/registry/file_registry.rb @@ -10,71 +10,80 @@ module Kiba module Extend - # Transforms a file_registry hash into an object that can return source, lookup, or destination - # config for that file, for passing to jobs - # - # An example of a file registry setup in a project can be found at: - # https://github.com/lyrasis/fwm-cspace-migration/blob/main/lib/fwm/registry_data.rb - class FileRegistry - include Dry::Container::Mixin - - config.namespace_separator = '__' - - # Exception raised if the file key is not registered - class KeyNotRegisteredError < StandardError - # @param filekey [Symbol] - def initialize(filekey) - msg = "No file registered under the key: :#{filekey}" - super(msg) + module Registry + # Transforms a file_registry hash into an object that can return source, lookup, or destination + # config for that file, for passing to jobs + # + # An example of a file registry setup in a project can be found at: + # https://github.com/lyrasis/fwm-cspace-migration/blob/main/lib/fwm/registry_data.rb + class FileRegistry + include Dry::Container::Mixin + + config.namespace_separator = '__' + + # Exception raised if the file key is not registered + class KeyNotRegisteredError < StandardError + # @param filekey [Symbol] + def initialize(filekey) + msg = "No file registered under the key: :#{filekey}" + super(msg) + end end - end - def as_destination(filekey) - Kiba::Extend::RegisteredDestination.new(key: filekey, data: lookup(filekey)) - end + # @param filekey [String, Symbol] file registry key for file to be used as destination + # @return [Kiba::Extend::Registry::RegisteredDestination] + def as_destination(filekey) + RegisteredDestination.new(key: filekey, data: lookup(filekey)) + end - def as_lookup(filekey) - Kiba::Extend::RegisteredLookup.new(key: filekey, data: lookup(filekey)) - end + # @param filekey [String, Symbol] file registry key for file to be used as a lookup source + # @return [Kiba::Extend::Registry::RegisteredLookup] + def as_lookup(filekey) + RegisteredLookup.new(key: filekey, data: lookup(filekey)) + end - def as_source(filekey) - Kiba::Extend::RegisteredSource.new(key: filekey, data: lookup(filekey)) - end + # @param filekey [String, Symbol] file registry key for file to be used as a source + # @return [Kiba::Extend::Registry::RegisteredSource] + def as_source(filekey) + RegisteredSource.new(key: filekey, data: lookup(filekey)) + end - def entries - @entries ||= populate_entries - end + # @return + def entries + @entries ||= populate_entries + end - def transform - each { |key, val| decorate(key) { FileRegistryEntry.new(val) } } - @entries = populate_entries - each { |key, val| val.set_key(key) } - end + def transform + each { |key, val| decorate(key) { FileRegistryEntry.new(val) } } + @entries = populate_entries + each { |key, val| val.set_key(key) } + end - def valid? - validator.valid? - end + def valid? + validator.valid? + end - def warnings? - validator.warnings? - end + def warnings? + validator.warnings? + end - private + private - def lookup(key) - resolve(key) - rescue Dry::Container::Error - raise KeyNotRegisteredError, key - end + def lookup(key) + resolve(key) + rescue Dry::Container::Error + raise KeyNotRegisteredError, key + end - def populate_entries - arr = [] - each { |entry| arr << entry[1] } - arr - end + def populate_entries + arr = [] + each { |entry| arr << entry[1] } + arr + end - def validator - @validator ||= RegistryValidator.new + def validator + @validator ||= RegistryValidator.new + end end end end diff --git a/lib/kiba/extend/registry/file_registry_entry.rb b/lib/kiba/extend/registry/file_registry_entry.rb index d413a302a..bde76eda8 100644 --- a/lib/kiba/extend/registry/file_registry_entry.rb +++ b/lib/kiba/extend/registry/file_registry_entry.rb @@ -4,137 +4,145 @@ module Kiba module Extend - # Value object capturing the data about an entry in the file registry - # - # This is the underlying data that can be used to derive a registered source, - # destination, or lookup file object. - # - # Used instead of just passing around a Hash so that it can validate itself and - # carry its own errors/warnings - class FileRegistryEntry - include SourceDestRegistry - - attr_reader :path, :key, - :creator, :supplied, :dest_special_opts, :desc, :lookup_on, :tags, :message, - :dest_class, :dest_opt, :src_class, :src_opt, :type, - :valid, :errors, :warnings - - # allowed types - TYPES = :file, :fileset, :enum, :lambda - - # @param reghash [Hash] File data. See {file:doc/file_registry_entry.md} for details - def initialize(reghash) - set_defaults - assign_values_from(reghash) - validate - end + module Registry + # Value object capturing the data about an entry in the file registry + # + # This is the underlying data that can be used to derive a registered source, + # destination, or lookup file object. + # + # Used instead of just passing around a Hash so that it can validate itself and + # carry its own errors/warnings + class FileRegistryEntry + include SourceDestRegistry + + attr_reader :path, :key, + :creator, :supplied, :dest_special_opts, :desc, :lookup_on, :tags, :message, + :dest_class, :dest_opt, :src_class, :src_opt, :type, + :valid, :errors, :warnings + + # allowed types + TYPES = :file, :fileset, :enum, :lambda + + # @param reghash [Hash] File data. See {file:doc/file_registry_entry.md} for details + def initialize(reghash) + set_defaults + assign_values_from(reghash) + validate + end - def set_key(key) - @key = key - end + # Used by FileRegistry.transform to add the key as an instance variable to each Entry + def set_key(key) + @key = key + end - def summary - lines = ["#{key} -- #{tags.join(', ')}"] - lines << " #{path}" if path - lines << " #{desc}" if desc - lines << " #{creator}" if creator - lines.join("\n") - end + # Printable string summarizing the Entry + # + # Called by project applications + def summary + lines = ["#{key} -- #{tags.join(', ')}"] + lines << " #{path}" if path + lines << " #{desc}" if desc + lines << " #{creator}" if creator + lines.join("\n") + end - def valid? - valid - end + # Whether the Entry is valid + # @return [Boolean] + def valid? + valid + end - private + private - def allowed_settings - instance_variables + def allowed_settings + instance_variables .map(&:to_s) .map { |str| str.delete_prefix('@') } .map(&:to_sym) - end + end - def allowed_setting?(key) - allowed_settings.any?(key) - end + def allowed_setting?(key) + allowed_settings.any?(key) + end - def assign_value(key, val) - if allowed_setting?(key) - instance_variable_set("@#{key}".to_sym, val) - else - @warnings << ":#{key} is not an allowed FileRegistryEntry setting" + def assign_value(key, val) + if allowed_setting?(key) + instance_variable_set("@#{key}".to_sym, val) + else + @warnings << ":#{key} is not an allowed FileRegistryEntry setting" + end end - end - def assign_values_from(reghash) - reghash.each { |key, val| assign_value(key, val) } - end + def assign_values_from(reghash) + reghash.each { |key, val| assign_value(key, val) } + end - def path_required? - chk = [dest_class, src_class].map { |klass| requires_path?(klass) } - return false if chk.uniq == [false] + def path_required? + chk = [dest_class, src_class].map { |klass| requires_path?(klass) } + return false if chk.uniq == [false] - true - end + true + end - def set_defaults - @type = :file - @creator = nil - @desc = '' - @dest_class = Kiba::Extend.destination - @dest_opt = Kiba::Extend.csvopts - @dest_special_opts = nil - @lookup_on = nil - @path = nil - @src_class = Kiba::Extend.source - @src_opt = Kiba::Extend.csvopts - @supplied = false - @tags = [] - @valid = false - @errors = {} - @warnings = [] - end + def set_defaults + @type = :file + @creator = nil + @desc = '' + @dest_class = Kiba::Extend.destination + @dest_opt = Kiba::Extend.csvopts + @dest_special_opts = nil + @lookup_on = nil + @path = nil + @src_class = Kiba::Extend.source + @src_opt = Kiba::Extend.csvopts + @supplied = false + @tags = [] + @valid = false + @errors = {} + @warnings = [] + end - def validate - validate_path - validate_creator - validate_type - @valid = true if errors.empty? - end + def validate + validate_path + validate_creator + validate_type + @valid = true if errors.empty? + end - def validate_creator - return if supplied + def validate_creator + return if supplied - validate_creator_present - validate_creator_is_method - end + validate_creator_present + validate_creator_is_method + end - def validate_creator_is_method - return if creator.is_a?(Method) - - @errors[:creator_not_a_method] = creator.dup - @creator = nil - end + def validate_creator_is_method + return if creator.is_a?(Method) + + @errors[:creator_not_a_method] = creator.dup + @creator = nil + end - def validate_creator_present - return if creator - - @errors[:missing_creator_for_non_supplied_file] = nil - end - - def validate_path - if path_required? && !path - @errors[:missing_path] = nil - return + def validate_creator_present + return if creator + + @errors[:missing_creator_for_non_supplied_file] = nil end + + def validate_path + if path_required? && !path + @errors[:missing_path] = nil + return + end - @path = Pathname.new(path) if path - end + @path = Pathname.new(path) if path + end - def validate_type - return if TYPES.any?(@type) + def validate_type + return if TYPES.any?(@type) - @errors[:unknown_type] = @type + @errors[:unknown_type] = @type + end end end end diff --git a/lib/kiba/extend/registry/registered_destination.rb b/lib/kiba/extend/registry/registered_destination.rb index 174e58645..3654d0932 100644 --- a/lib/kiba/extend/registry/registered_destination.rb +++ b/lib/kiba/extend/registry/registered_destination.rb @@ -4,61 +4,63 @@ module Kiba module Extend - # Value object representing a destination file registered in a {Kiba::Extend::FileRegistry} - class RegisteredDestination < RegisteredFile - # Arguments for calling Kiba Destination class - def args - return [simple_args] unless @data.dest_special_opts + module Registry + # Value object representing a destination file registered in a {Kiba::Extend::FileRegistry} + class RegisteredDestination < RegisteredFile + # Arguments for calling Kiba Destination class + def args + return [simple_args] unless @data.dest_special_opts - opts = supported_special_opts - warn_about_opts if opts.length < @data.dest_special_opts.length - return [simple_args] if opts.empty? + opts = supported_special_opts + warn_about_opts if opts.length < @data.dest_special_opts.length + return [simple_args] if opts.empty? - [simple_args.merge(supported_special_opts)] - end + [simple_args.merge(supported_special_opts)] + end - # Description of file - # - # Used in post-processing STDOUT - def description - @data.desc - end + # Description of file + # + # Used in post-processing STDOUT + def description + @data.desc + end - # Info hash for file - # - # @deprecated Use {#description} and {#key} instead - def info - { filekey: @key, desc: description } - end + # Info hash for file + # + # @deprecated Use {#description} and {#key} instead + def info + { filekey: @key, desc: description } + end - # Kiba Destination class to call - def klass - @data.dest_class - end + # Kiba Destination class to call + def klass + @data.dest_class + end - private + private - def klass_opts - klass.instance_method(:initialize).parameters.map { |arr| arr[1] } - end + def klass_opts + klass.instance_method(:initialize).parameters.map { |arr| arr[1] } + end - def simple_args - return { filename: path }.merge(options_label(klass) => @data.dest_opt) if @data.dest_opt + def simple_args + return { filename: path }.merge(options_label(klass) => @data.dest_opt) if @data.dest_opt - { filename: path }.merge(labeled_options(klass)) - end + { filename: path }.merge(labeled_options(klass)) + end - def supported_special_opts - @data.dest_special_opts.select { |key, _| klass_opts.any?(key) } - end + def supported_special_opts + @data.dest_special_opts.select { |key, _| klass_opts.any?(key) } + end - def unsupported_special_opts - @data.dest_special_opts.reject { |key, _| klass_opts.any?(key) } - end + def unsupported_special_opts + @data.dest_special_opts.reject { |key, _| klass_opts.any?(key) } + end - def warn_about_opts - unsupported_special_opts.each do |opt, _| - puts "WARNING: Destination file :#{key} is called with special option :#{opt}, which is unsupported by #{klass}" + def warn_about_opts + unsupported_special_opts.each do |opt, _| + puts "WARNING: Destination file :#{key} is called with special option :#{opt}, which is unsupported by #{klass}" + end end end end diff --git a/lib/kiba/extend/registry/registered_file.rb b/lib/kiba/extend/registry/registered_file.rb index 030d1a9cd..afc21c0f9 100644 --- a/lib/kiba/extend/registry/registered_file.rb +++ b/lib/kiba/extend/registry/registered_file.rb @@ -4,34 +4,36 @@ module Kiba module Extend - # Abstract base class defining interface for destination files, lookup files, and source files - # returned by {Kiba::Extend::FileRegistry} - class RegisteredFile - include SourceDestRegistry - # Exception raised if no path is given in {FileRegistry} hash - class NoFilePathError < StandardError - # @param filekey [Symbol] key for which a file path was not found in {Kiba::Extend::FileRegistry} - def initialize(filekey) - msg = "No file path for :#{filekey} is recorded in file registry hash" - super(msg) + module Registry + # Abstract base class defining interface for destination files, lookup files, and source files + # returned by {Kiba::Extend::FileRegistry} + class RegisteredFile + include SourceDestRegistry + # Exception raised if no path is given in {FileRegistry} hash + class NoFilePathError < StandardError + # @param filekey [Symbol] key for which a file path was not found in {Kiba::Extend::FileRegistry} + def initialize(filekey) + msg = "No file path for :#{filekey} is recorded in file registry hash" + super(msg) + end end - end - # @!attribute [r] key - # @return [Symbol] The file's key in {FileRegistry} hash - attr_reader :key, :data + # @!attribute [r] key + # @return [Symbol] The file's key in {FileRegistry} hash + attr_reader :key, :data - # @param key [Symbol] the {Kiba::Extend::FileRegistry} lookup key - # @param data [Hash] the hash of data for the file from {Kiba::Extend::FileRegistry} - def initialize(key:, data:) - raise FileNotRegisteredError, key unless data - raise NoFilePathError, key if data.errors.keys.any?(:missing_path) + # @param key [Symbol] the {Kiba::Extend::FileRegistry} lookup key + # @param data [Hash] the hash of data for the file from {Kiba::Extend::FileRegistry} + def initialize(key:, data:) + raise FileNotRegisteredError, key unless data + raise NoFilePathError, key if data.errors.keys.any?(:missing_path) - @key, @data = key, data - end + @key, @data = key, data + end - def path - @data.path.to_s + def path + @data.path.to_s + end end end end diff --git a/lib/kiba/extend/registry/registered_lookup.rb b/lib/kiba/extend/registry/registered_lookup.rb index b8766044c..9f02565db 100644 --- a/lib/kiba/extend/registry/registered_lookup.rb +++ b/lib/kiba/extend/registry/registered_lookup.rb @@ -5,42 +5,45 @@ module Kiba module Extend - # Value object representing a file registered in a {Kiba::Extend::FileRegistry} that is being - # called into another job as a lookup table - # - # Assumes this file will be used to build a {Kiba::Extend::Lookup} - class RegisteredLookup < RegisteredFile - include RequirableFile - # Exception raised if {Kiba::Extend::FileRegistry} contains no lookup key for file - class NoLookupKeyError < StandardError - # @param filekey [Symbol] key not found in {Kiba::Extend::FileRegistry} - def initialize(filekey) - msg = "No lookup key column found for :#{filekey} in file registry hash" - super(msg) + module Registry + # Value object representing a file registered in a {Kiba::Extend::FileRegistry} that is being + # called into another job as a lookup table + # + # Assumes this file will be used to build a {Kiba::Extend::Lookup} + class RegisteredLookup < RegisteredFile + include RequirableFile + # Exception raised if {Kiba::Extend::FileRegistry} contains no lookup key for file + class NoLookupKeyError < StandardError + # @param filekey [Symbol] key not found in {Kiba::Extend::FileRegistry} + def initialize(filekey) + msg = "No lookup key column found for :#{filekey} in file registry hash" + super(msg) + end end - end - class NonSymbolLookupKeyError < StandardError - # @param filekey [Symbol] key not found in {Kiba::Extend::FileRegistry} - def initialize(filekey) - msg = "Lookup key found for :#{filekey} is not a Ruby Symbol. Prepend a : to the field name to fix." - super(msg) + # Exception raised if the lookup key value for the file is not a Symbol + class NonSymbolLookupKeyError < StandardError + # @param filekey [Symbol] key not found in {Kiba::Extend::FileRegistry} + def initialize(filekey) + msg = "Lookup key found for :#{filekey} is not a Ruby Symbol. Prepend a : to the field name to fix." + super(msg) + end end - end - # @param key [Symbol] file key from {FileRegistry} data hash - # @param data [Hash] file data from {FileRegistry} - def initialize(key:, data:) - super - raise NoLookupKeyError, @key unless @data.lookup_on - raise NonSymbolLookupKeyError, @key unless @data.lookup_on.is_a?(Symbol) - end + # @param key [Symbol] file key from {FileRegistry} data hash + # @param data [Hash] file data from {FileRegistry} + def initialize(key:, data:) + super + raise NoLookupKeyError, @key unless @data.lookup_on + raise NonSymbolLookupKeyError, @key unless @data.lookup_on.is_a?(Symbol) + end - # Arguments for calling {Kiba::Extend::Lookup} with this file - # @return [Hash] - def args - opts = @data.src_opt ? @data.src_opt : file_options(@data.src_class) - { file: path, csvopt: opts, keycolumn: @data.lookup_on } + # Arguments for calling {Kiba::Extend::Lookup} with this file + # @return [Hash] + def args + opts = @data.src_opt ? @data.src_opt : file_options(@data.src_class) + { file: path, csvopt: opts, keycolumn: @data.lookup_on } + end end end end diff --git a/lib/kiba/extend/registry/registered_source.rb b/lib/kiba/extend/registry/registered_source.rb index 5acb21a20..08d8cfd09 100644 --- a/lib/kiba/extend/registry/registered_source.rb +++ b/lib/kiba/extend/registry/registered_source.rb @@ -5,21 +5,23 @@ module Kiba module Extend - # Value object representing a file registered in a {Kiba::Extend::FileRegistry} that is being - # called into another job as a source table - class RegisteredSource < RegisteredFile - include RequirableFile + module Registry + # Value object representing a file registered in a {Kiba::Extend::FileRegistry} that is being + # called into another job as a source table + class RegisteredSource < RegisteredFile + include RequirableFile - # Arguments for calling Kiba Source class - # @return [Hash] - def args - opts = @data.src_opt ? { options_label(klass) => @data.src_opt } : labeled_options(klass) - [{ filename: path }.merge(opts)] - end + # Arguments for calling Kiba Source class + # @return [Hash] + def args + opts = @data.src_opt ? { options_label(klass) => @data.src_opt } : labeled_options(klass) + [{ filename: path }.merge(opts)] + end - # Kiba Source class to call - def klass - @data.src_class + # Kiba Source class to call + def klass + @data.src_class + end end end end diff --git a/lib/kiba/extend/registry/registry_entry_selector.rb b/lib/kiba/extend/registry/registry_entry_selector.rb index 76ea64d42..4887b1e16 100644 --- a/lib/kiba/extend/registry/registry_entry_selector.rb +++ b/lib/kiba/extend/registry/registry_entry_selector.rb @@ -2,44 +2,56 @@ module Kiba module Extend - class RegistryEntrySelector - # @param cstr [String] stringified class name - def created_by_class(cstr) - with_creator.select { |entry| entry.creator.owner.to_s[cstr] } - end + module Registry + # Used in Rake tasks in project application to identify particular files/jobs + # to run or display information about + class RegistryEntrySelector + # Registry entries created by a given class + # @param cstr [String] stringified class name + # @return [Array] + def created_by_class(cstr) + with_creator.select { |entry| entry.creator.owner.to_s[cstr] } + end - # @param mstr [String] stringified method name - def created_by_method(mstr) - matcher = "#] + def created_by_method(mstr) + matcher = "#] + # @return [Array] + def tagged_all(*args) + tags = args.flatten.map(&:to_sym) + tags.inject(Kiba::Extend.registry.entries) do |arr, tag| + arr.select { |entry| entry.tags.any?(tag) } + end end - end - # Selects entries whose tags include one or more of the given tags - def tagged_any(*args) - tags = args.flatten.map(&:to_sym) - results = tags.inject([]) do |arr, arg| - arr << tagged(arg) - arr + # Selects entries whose tags include one or more of the given tags + # @param args [Array] + # @return [Array] + def tagged_any(*args) + tags = args.flatten.map(&:to_sym) + results = tags.inject([]) do |arr, arg| + arr << tagged(arg) + arr + end + results.flatten.uniq end - results.flatten.uniq - end - private + private - def tagged(tag) - Kiba::Extend.registry.entries.select { |entry| entry.tags.any?(tag) } - end + def tagged(tag) + Kiba::Extend.registry.entries.select { |entry| entry.tags.any?(tag) } + end - def with_creator - Kiba::Extend.registry.entries.select { |entry| entry.creator } + def with_creator + Kiba::Extend.registry.entries.select { |entry| entry.creator } + end end end end diff --git a/lib/kiba/extend/registry/registry_list.rb b/lib/kiba/extend/registry/registry_list.rb index 0832c469b..e4003c470 100644 --- a/lib/kiba/extend/registry/registry_list.rb +++ b/lib/kiba/extend/registry/registry_list.rb @@ -2,11 +2,18 @@ module Kiba module Extend - class RegistryList - def initialize(*args) - puts '' - list = args.empty? ? Kiba::Extend.registry.entries : args.flatten - list.each { |entry| puts entry.summary } + module Registry + # Utility class used by project applications to display information about a set of + # registered files/jobs + # + # Puts to STDOUT + class RegistryList + # @param args [Array] + def initialize(*args) + puts '' + list = args.empty? ? Kiba::Extend.registry.entries : args.flatten + list.each { |entry| puts entry.summary } + end end end end diff --git a/lib/kiba/extend/registry/registry_validator.rb b/lib/kiba/extend/registry/registry_validator.rb index 7acc33090..73a4aac63 100644 --- a/lib/kiba/extend/registry/registry_validator.rb +++ b/lib/kiba/extend/registry/registry_validator.rb @@ -2,65 +2,75 @@ module Kiba module Extend - class RegistryValidator - def report - puts '' - report_validity - report_warnings - end - - def valid? - return true if invalid.empty? + module Registry + # Utility class to report on the validity of the FileRegistry. + class RegistryValidator + # Prints to STDOUT a report of errors and warnings to inform needed development in project applications + def report + puts '' + report_validity + report_warnings + end - false - end + # Check validity of the registry as a whole + # @return [TrueClass] if all Entries are valid + # @return [FalseClass] is any Entries are invalid + def valid? + return true if invalid.empty? - def warnings? - return false if warnings.empty? + false + end - true - end + # Whether any entries have warnings + # @return [TrueClass] if any Entries have warnings + # @return [FalseClass] is no Entries have warnings + def warnings? + return false if warnings.empty? - private + true + end - def errs_to_str(errs) - errs.map { |key, val| "#{key} #{val}" }.join('; ') - end + private - def invalid - Kiba::Extend.registry.entries.reject { |entry| entry.valid? } - end + def errs_to_str(errs) + errs.map { |key, val| "#{key} #{val}" }.join('; ') + end - def report_invalid - puts "Error count: #{invalid.length}" - invalid.each do |entry| - puts " #{entry.key}: #{errs_to_str(entry.errors)}" + def invalid + Kiba::Extend.registry.entries.reject { |entry| entry.valid? } end - end - def report_validity - if valid? - puts 'All file registry entries are valid!' - return + def report_invalid + puts "Error count: #{invalid.length}" + invalid.each do |entry| + puts " #{entry.key}: #{errs_to_str(entry.errors)}" + end end - report_invalid - end + def report_validity + if valid? + puts 'All file registry entries are valid!' + return + end - def report_warnings - unless warnings? - puts 'No warnings!' - return + report_invalid end - puts "Warning count: #{warnings.length}" - warnings.each do |entry| - puts " #{entry.key}: #{entry.warnings.join('; ')}" + def report_warnings + unless warnings? + puts 'No warnings!' + return + end + + puts "Warning count: #{warnings.length}" + warnings.each do |entry| + puts " #{entry.key}: #{entry.warnings.join('; ')}" + end end - end - def warnings - Kiba::Extend.registry.entries.reject { |entry| entry.warnings.empty? } + def warnings + Kiba::Extend.registry.entries.reject { |entry| entry.warnings.empty? } + end end end end diff --git a/lib/kiba/extend/registry/requirable_file.rb b/lib/kiba/extend/registry/requirable_file.rb index 9e681d135..914908f0a 100644 --- a/lib/kiba/extend/registry/requirable_file.rb +++ b/lib/kiba/extend/registry/requirable_file.rb @@ -2,25 +2,28 @@ module Kiba module Extend - # Mixin module for some children of {Kiba::Extend::RegisteredFile} - module RequirableFile - # Exception raised if {Kiba::Extend::FileRegistry} contains no creator for file - class NoDependencyCreatorError < StandardError - # @param filekey [Symbol] key for file lacking creator in {Kiba::Extend::FileRegistry} - def initialize(filekey) - msg = "No creator method found for :#{filekey} in file registry" - super(msg) + module Registry + # Mixin module for children of {Kiba::Extend::RegisteredFile} that other jobs depend upon + module RequirableFile + # Exception raised if {Kiba::Extend::FileRegistry} contains no creator for file + class NoDependencyCreatorError < StandardError + # @param filekey [Symbol] key for file lacking creator in {Kiba::Extend::FileRegistry} + def initialize(filekey) + msg = "No creator method found for :#{filekey} in file registry" + super(msg) + end end - end - def required - return if File.exist?(@data.path) + # @return [Method] the creator method for a required dependency job + def required + return if File.exist?(@data.path) - %i[missing_creator_for_non_supplied_file creator_not_a_method].each do |err| - raise NoDependencyCreatorError, @key if @data.errors.keys.any?(err) - end + %i[missing_creator_for_non_supplied_file creator_not_a_method].each do |err| + raise NoDependencyCreatorError, @key if @data.errors.keys.any?(err) + end - @data.creator + @data.creator + end end end end diff --git a/lib/kiba/extend/registry/source_dest_registry.rb b/lib/kiba/extend/registry/source_dest_registry.rb index 0893c7ec7..779c680f4 100644 --- a/lib/kiba/extend/registry/source_dest_registry.rb +++ b/lib/kiba/extend/registry/source_dest_registry.rb @@ -2,61 +2,63 @@ module Kiba module Extend - module SourceDestRegistry - module_function + module Registry + module SourceDestRegistry + module_function - # Registry of known source/destination classes and whether they require a path - # - # Enumerable and Lambda are 'in-memory' and useful for testing and possibly - # virtual transforms on the fly. See an example of use at: - # https://github.com/thbar/kiba-common/blob/master/test/test_lambda_destination.rb - def requires_path?(klass) - data = { - nil => false, - Kiba::Extend::Destinations::CSV => true, - Kiba::Common::Destinations::CSV => true, - Kiba::Common::Destinations::Lambda => false, - Kiba::Common::Sources::CSV => true, - Kiba::Common::Sources::Enumerable => false - } - data[klass] - end + # Registry of known source/destination classes and whether they require a path + # + # Enumerable and Lambda are 'in-memory' and useful for testing and possibly + # virtual transforms on the fly. See an example of use at: + # https://github.com/thbar/kiba-common/blob/master/test/test_lambda_destination.rb + def requires_path?(klass) + data = { + nil => false, + Kiba::Extend::Destinations::CSV => true, + Kiba::Common::Destinations::CSV => true, + Kiba::Common::Destinations::Lambda => false, + Kiba::Common::Sources::CSV => true, + Kiba::Common::Sources::Enumerable => false + } + data[klass] + end - def file_options(klass) - data = { - nil => nil, - Kiba::Extend::Destinations::CSV => Kiba::Extend.csvopts, - Kiba::Common::Destinations::CSV => Kiba::Extend.csvopts, - Kiba::Common::Destinations::Lambda => Kiba::Extend.lambdaopts, - Kiba::Common::Sources::CSV => Kiba::Extend.csvopts, - Kiba::Common::Sources::Enumerable => nil - } - data[klass] - end + def file_options(klass) + data = { + nil => nil, + Kiba::Extend::Destinations::CSV => Kiba::Extend.csvopts, + Kiba::Common::Destinations::CSV => Kiba::Extend.csvopts, + Kiba::Common::Destinations::Lambda => Kiba::Extend.lambdaopts, + Kiba::Common::Sources::CSV => Kiba::Extend.csvopts, + Kiba::Common::Sources::Enumerable => nil + } + data[klass] + end - def labeled_options(klass) - data = { - nil => nil, - Kiba::Extend::Destinations::CSV => { options_label(klass) => file_options(klass) }, - Kiba::Common::Destinations::CSV => { options_label(klass) => file_options(klass) }, - Kiba::Common::Destinations::Lambda => { options_label(klass) => file_options(klass) }, - Kiba::Common::Sources::CSV => { options_label(klass) => file_options(klass) }, - Kiba::Common::Sources::Enumerable => nil - } - data[klass] - end + def labeled_options(klass) + data = { + nil => nil, + Kiba::Extend::Destinations::CSV => { options_label(klass) => file_options(klass) }, + Kiba::Common::Destinations::CSV => { options_label(klass) => file_options(klass) }, + Kiba::Common::Destinations::Lambda => { options_label(klass) => file_options(klass) }, + Kiba::Common::Sources::CSV => { options_label(klass) => file_options(klass) }, + Kiba::Common::Sources::Enumerable => nil + } + data[klass] + end - # The Symbol used for the options in the Kiba Source/Destination file configuration hash - def options_label(klass) - data = { - nil => nil, - Kiba::Extend::Destinations::CSV => :csv_options, - Kiba::Common::Destinations::CSV => :csv_options, - Kiba::Common::Destinations::Lambda => :options, - Kiba::Common::Sources::CSV => :csv_options, - Kiba::Common::Sources::Enumerable => nil - } - data[klass] + # The Symbol used for the options in the Kiba Source/Destination file configuration hash + def options_label(klass) + data = { + nil => nil, + Kiba::Extend::Destinations::CSV => :csv_options, + Kiba::Common::Destinations::CSV => :csv_options, + Kiba::Common::Destinations::Lambda => :options, + Kiba::Common::Sources::CSV => :csv_options, + Kiba::Common::Sources::Enumerable => nil + } + data[klass] + end end end end diff --git a/lib/kiba/extend/sources.rb b/lib/kiba/extend/sources.rb new file mode 100644 index 000000000..6616d3651 --- /dev/null +++ b/lib/kiba/extend/sources.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Kiba + module Extend + # Classes defining data inputs + # + # This structure follows the pattern set out in {https://github.com/thbar/kiba-common kiba-common} + module Sources + end + end +end diff --git a/lib/kiba/extend/transforms/deduplicate.rb b/lib/kiba/extend/transforms/deduplicate.rb index 90105e528..1e46989e0 100644 --- a/lib/kiba/extend/transforms/deduplicate.rb +++ b/lib/kiba/extend/transforms/deduplicate.rb @@ -379,6 +379,7 @@ def get_value_frequency(fv) # | c | e | h | # ``` # + # @since 2.2.0 class Table # @param field [Symbol] name of field on which to deduplicate # @param delete_field [Boolean] whether to delete the deduplication field after doing deduplication diff --git a/lib/kiba/extend/transforms/extract.rb b/lib/kiba/extend/transforms/extract.rb index 0ecab1ded..9ec8bfa72 100644 --- a/lib/kiba/extend/transforms/extract.rb +++ b/lib/kiba/extend/transforms/extract.rb @@ -4,6 +4,7 @@ module Kiba module Extend module Transforms # Transformations that extract specified data from a source + # @since 2.2.0 module Extract ::Extract = Kiba::Extend::Transforms::Extract diff --git a/lib/kiba/extend/transforms/merge.rb b/lib/kiba/extend/transforms/merge.rb index f76f74a6e..662c1a550 100644 --- a/lib/kiba/extend/transforms/merge.rb +++ b/lib/kiba/extend/transforms/merge.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative '../utils/fieldset' + module Kiba module Extend module Transforms @@ -158,7 +160,7 @@ def initialize(fieldmap:, lookup:, keycolumn:, constantmap: {}, # @private def process(row) - field_data = Kiba::Extend::Fieldset.new(@fieldmap.values) + field_data = Kiba::Extend::Utils::Fieldset.new(@fieldmap.values) id_data = row.fetch(@keycolumn, '') id_data = id_data.nil? ? '' : id_data diff --git a/lib/kiba/extend/utils/fieldset.rb b/lib/kiba/extend/utils/fieldset.rb new file mode 100644 index 000000000..0cc33cf01 --- /dev/null +++ b/lib/kiba/extend/utils/fieldset.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +module Kiba + module Extend + module Utils + # Data structure class used in processing merge transforms + class Fieldset + def initialize(fields) + @hash = {} + fields.each { |field| @hash[field] = [] } + end + + def add_constant_values(field, value) + @hash[field] = [] + value_ct.times { @hash[field] << value } + end + + def fields + @hash.keys + end + + attr_reader :hash + + def join_values(delim) + @hash.transform_values! { |vals| vals.join(delim) } + end + + def populate(rows) + return if rows.empty? + + rows.each { |row| get_field_values(row) } + remove_valueless_rows + end + + def value_ct + @hash.values.first.length + end + + private + + def get_field_values(row) + fields.each do |field| + fetched = row.fetch(field, nil) + value = fetched.blank? ? nil : fetched + @hash[field] << value + end + end + + def remove_valueless_rows + valueless_indices.each do |index| + @hash.each { |_field, values| values.delete_at(index) } + end + end + + def valueless_indices + indices = [] + @hash.values.first.each_with_index do |_element, i| + indices << i if @hash.values.map { |vals| vals[i] }.compact.empty? + end + indices.sort.reverse + end + end + end + end +end diff --git a/lib/kiba/extend/utils/lookup.rb b/lib/kiba/extend/utils/lookup.rb index c259541e8..7b1b6916a 100644 --- a/lib/kiba/extend/utils/lookup.rb +++ b/lib/kiba/extend/utils/lookup.rb @@ -6,8 +6,12 @@ module Utils module Lookup ::Lookup = Kiba::Extend::Utils::Lookup extend self - # use when keycolumn values are unique - # creates hash with keycolumn value as key and csv-row-as-hash as the value + + # @deprecated in 2.2.0. The original `csv_to_multi_hash` now has the name + # `csv_to_hash`. `csv_to_multi_hash` is now aliased to `csv_to_hash`. Since + # creating these methods, I never once needed to use the original `csv_to_hash` + # method. Any need for it can be met by the multi-hash implementation + # @todo remove this entirely at some point def csv_to_hash_deprecated(file:, keycolumn:, csvopt: {}) CSV.foreach(File.expand_path(file), csvopt).each_with_object({}) do |r, memo| memo[r.fetch(keycolumn, nil)] = r.to_h diff --git a/lib/kiba/extend/version.rb b/lib/kiba/extend/version.rb index d945aee50..0add0ab10 100644 --- a/lib/kiba/extend/version.rb +++ b/lib/kiba/extend/version.rb @@ -2,6 +2,6 @@ module Kiba module Extend - VERSION = '2.2.0' + VERSION = '2.2.1' end end diff --git a/spec/kiba/extend/jobs/job_spec.rb b/spec/kiba/extend/jobs/job_spec.rb index 323b9b32a..7ba9d721b 100644 --- a/spec/kiba/extend/jobs/job_spec.rb +++ b/spec/kiba/extend/jobs/job_spec.rb @@ -6,7 +6,7 @@ RSpec.describe 'Kiba::Extend::Jobs::Job' do before(:context) do @dest_file = File.join(fixtures_dir, 'base_job_dest.csv') - Kiba::Extend.config.registry = Kiba::Extend::FileRegistry.new + Kiba::Extend.config.registry = Kiba::Extend::Registry::FileRegistry.new entries = { base_src: { path: File.join(fixtures_dir, 'base_job_base.csv'), supplied: true }, base_lookup: { path: File.join(fixtures_dir, 'base_job_lookup.csv'), supplied: true, lookup_on: :letter }, @@ -53,7 +53,7 @@ xit 'calls dependency creators' do missing_file = File.join(fixtures_dir, 'base_job_missing.csv') creator = double() - Kiba::Extend.config.registry = Kiba::Extend::FileRegistry.new + Kiba::Extend.config.registry = Kiba::Extend::Registry::FileRegistry.new entries = { base_lookup: { path: File.join(fixtures_dir, 'base_job_lookup.csv'), supplied: true, lookup_on: :letter }, base_dest: { path: @dest_file, creator: Helpers.method(:fake_creator_method) }, missing_src: { path: missing_file, creator: Helpers::BaseJob.method(:creator) } } diff --git a/spec/kiba/extend/registry/file_registry_entry_spec.rb b/spec/kiba/extend/registry/file_registry_entry_spec.rb index da8ae3381..458eabffe 100644 --- a/spec/kiba/extend/registry/file_registry_entry_spec.rb +++ b/spec/kiba/extend/registry/file_registry_entry_spec.rb @@ -3,9 +3,9 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::FileRegistryEntry' do +RSpec.describe 'Kiba::Extend::Registry::FileRegistryEntry' do let(:path) { File.join('spec', 'fixtures', 'fkey.csv') } - let(:entry) { Kiba::Extend::FileRegistryEntry.new(data) } + let(:entry) { Kiba::Extend::Registry::FileRegistryEntry.new(data) } let(:reghash) do { fkey: { path: path, key: :foo }, diff --git a/spec/kiba/extend/registry/file_registry_spec.rb b/spec/kiba/extend/registry/file_registry_spec.rb index 96e1156d4..c3ce4f9e9 100644 --- a/spec/kiba/extend/registry/file_registry_spec.rb +++ b/spec/kiba/extend/registry/file_registry_spec.rb @@ -3,9 +3,9 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::FileRegistry' do +RSpec.describe 'Kiba::Extend::Registry::FileRegistry' do before(:context) do - Kiba::Extend.config.registry = Kiba::Extend::FileRegistry.new + Kiba::Extend.config.registry = Kiba::Extend::Registry::FileRegistry.new populate_registry end let(:filekey) { :fkey } @@ -45,21 +45,21 @@ registry.each { |item| chk << item[1].class } chk.uniq! expect(chk.length).to eq(1) - expect(chk.first).to eq(Kiba::Extend::FileRegistryEntry) + expect(chk.first).to eq(Kiba::Extend::Registry::FileRegistryEntry) end end describe 'as destination' do let(:result) { registry.as_destination(filekey) } it 'returns destination file config' do - expect(result).to be_a(Kiba::Extend::RegisteredDestination) + expect(result).to be_a(Kiba::Extend::Registry::RegisteredDestination) end context 'when called with nonexistent key' do let(:filekey) { :cats } it 'raises error' do msg = "No file registered under the key: :#{filekey}" - expect { result }.to raise_error(Kiba::Extend::FileRegistry::KeyNotRegisteredError, msg) + expect { result }.to raise_error(Kiba::Extend::Registry::FileRegistry::KeyNotRegisteredError, msg) end end end @@ -67,14 +67,22 @@ describe 'as lookup' do let(:result) { registry.as_lookup(filekey) } it 'returns lookup file config' do - expect(result).to be_a(Kiba::Extend::RegisteredLookup) + expect(result).to be_a(Kiba::Extend::Registry::RegisteredLookup) end end describe 'as source' do let(:result) { registry.as_source(filekey) } it 'returns source file config' do - expect(result).to be_a(Kiba::Extend::RegisteredSource) + expect(result).to be_a(Kiba::Extend::Registry::RegisteredSource) + end + end + + describe 'entries' do + let(:result) { registry.entries } + it 'returns Array of FileRegistryEntries' do + expect(result).to be_a(Array) + expect(result.first).to be_a(Kiba::Extend::Registry::FileRegistryEntry) end end end diff --git a/spec/kiba/extend/registry/registered_destination_spec.rb b/spec/kiba/extend/registry/registered_destination_spec.rb index 48bace00e..399a4a7c7 100644 --- a/spec/kiba/extend/registry/registered_destination_spec.rb +++ b/spec/kiba/extend/registry/registered_destination_spec.rb @@ -3,12 +3,17 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RegisteredDestination' do +RSpec.describe 'Kiba::Extend::Registry::RegisteredDestination' do let(:filekey) { :fkey } let(:path) { File.join('spec', 'fixtures', 'fkey.csv') } let(:default) { { path: path } } let(:default_desc) { { path: path, desc: 'description' } } - let(:dest) { Kiba::Extend::RegisteredDestination.new(key: filekey, data: Kiba::Extend::FileRegistryEntry.new(data)) } + let(:dest) do + Kiba::Extend::Registry::RegisteredDestination.new( + key: filekey, + data: Kiba::Extend::Registry::FileRegistryEntry.new(data) + ) + end let(:optres) { { csv_options: Kiba::Extend.csvopts } } describe '#args' do let(:result) { dest.args } diff --git a/spec/kiba/extend/registry/registered_file_spec.rb b/spec/kiba/extend/registry/registered_file_spec.rb index 7cef79462..e51d00f1a 100644 --- a/spec/kiba/extend/registry/registered_file_spec.rb +++ b/spec/kiba/extend/registry/registered_file_spec.rb @@ -3,21 +3,28 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RegisteredFile' do +RSpec.describe 'Kiba::Extend::Registry::RegisteredFile' do let(:filekey) { :fkey } let(:path) { File.join('spec', 'fixtures', 'fkey.csv') } let(:default) { { path: path } } - let(:dest) { Kiba::Extend::RegisteredFile.new(key: filekey, data: Kiba::Extend::FileRegistryEntry.new(data)) } + let(:dest) do + Kiba::Extend::Registry::RegisteredFile.new( + key: filekey, + data: Kiba::Extend::Registry::FileRegistryEntry.new(data) + ) + end context 'when called with no path' do let(:data) { { description: 'blah' } } it 'raises FileNotRegisteredError' do msg = "No file path for :#{filekey} is recorded in file registry hash" expect { - Kiba::Extend::RegisteredFile.new(key: filekey, - data: Kiba::Extend::FileRegistryEntry.new(data)) + Kiba::Extend::Registry::RegisteredFile.new( + key: filekey, + data: Kiba::Extend::Registry::FileRegistryEntry.new(data) + ) }.to raise_error( - Kiba::Extend::RegisteredFile::NoFilePathError, msg + Kiba::Extend::Registry::RegisteredFile::NoFilePathError, msg ) end end diff --git a/spec/kiba/extend/registry/registered_lookup_spec.rb b/spec/kiba/extend/registry/registered_lookup_spec.rb index 81c35d9b2..4633131e5 100644 --- a/spec/kiba/extend/registry/registered_lookup_spec.rb +++ b/spec/kiba/extend/registry/registered_lookup_spec.rb @@ -3,18 +3,23 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RegisteredLookup' do +RSpec.describe 'Kiba::Extend::Registry::RegisteredLookup' do let(:filekey) { :fkey } let(:path) { File.join('spec', 'fixtures', 'fkey.csv') } let(:key) { :foo } let(:default) { { path: path, lookup_on: key, creator: Helpers.method(:test_csv) } } - let(:lookup) { Kiba::Extend::RegisteredLookup.new(key: filekey, data: Kiba::Extend::FileRegistryEntry.new(data)) } + let(:lookup) do + Kiba::Extend::Registry::RegisteredLookup.new( + key: filekey, + data: Kiba::Extend::Registry::FileRegistryEntry.new(data) + ) + end context 'when called without lookup key' do let(:data) { { path: path } } it 'raises NoLookupKeyError' do msg = "No lookup key column found for :#{filekey} in file registry hash" - expect { lookup }.to raise_error(Kiba::Extend::RegisteredLookup::NoLookupKeyError, msg) + expect { lookup }.to raise_error(Kiba::Extend::Registry::RegisteredLookup::NoLookupKeyError, msg) end end diff --git a/spec/kiba/extend/registry/registered_source_spec.rb b/spec/kiba/extend/registry/registered_source_spec.rb index d847c830a..6c1a0b100 100644 --- a/spec/kiba/extend/registry/registered_source_spec.rb +++ b/spec/kiba/extend/registry/registered_source_spec.rb @@ -3,11 +3,16 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RegisteredSource' do +RSpec.describe 'Kiba::Extend::Registry::RegisteredSource' do let(:filekey) { :fkey } let(:path) { File.join('spec', 'fixtures', 'fkey.csv') } let(:default) { { path: path, creator: -> { Helpers.test_csv } } } - let(:source) { Kiba::Extend::RegisteredSource.new(key: filekey, data: Kiba::Extend::FileRegistryEntry.new(data)) } + let(:source) do + Kiba::Extend::Registry::RegisteredSource.new( + key: filekey, + data: Kiba::Extend::Registry::FileRegistryEntry.new(data) + ) + end describe '#args' do let(:result) { source.args } diff --git a/spec/kiba/extend/registry/registry_entry_selector_spec.rb b/spec/kiba/extend/registry/registry_entry_selector_spec.rb index 2745d0a63..5069f4965 100644 --- a/spec/kiba/extend/registry/registry_entry_selector_spec.rb +++ b/spec/kiba/extend/registry/registry_entry_selector_spec.rb @@ -3,12 +3,12 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RegistryEntrySelector' do +RSpec.describe 'Kiba::Extend::Registry::RegistryEntrySelector' do before(:context) do - Kiba::Extend.config.registry = Kiba::Extend::FileRegistry.new + Kiba::Extend.config.registry = Kiba::Extend::Registry::FileRegistry.new prepare_registry end - let(:selector) { Kiba::Extend::RegistryEntrySelector.new } + let(:selector) { Kiba::Extend::Registry::RegistryEntrySelector.new } describe '#tagged_any' do let(:result) { selector.tagged_any(tags) } diff --git a/spec/kiba/extend/registry/registry_validator_spec.rb b/spec/kiba/extend/registry/registry_validator_spec.rb index 52c29b210..c85121dfa 100644 --- a/spec/kiba/extend/registry/registry_validator_spec.rb +++ b/spec/kiba/extend/registry/registry_validator_spec.rb @@ -3,12 +3,12 @@ require 'spec_helper' # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RegistryValidator' do +RSpec.describe 'Kiba::Extend::Registry::RegistryValidator' do before(:context) do - Kiba::Extend.config.registry = Kiba::Extend::FileRegistry.new + Kiba::Extend.config.registry = Kiba::Extend::Registry::FileRegistry.new prepare_registry end - let(:validator) { Kiba::Extend::RegistryValidator.new } + let(:validator) { Kiba::Extend::Registry::RegistryValidator.new } describe '#valid?' do let(:result) { validator.valid? } diff --git a/spec/kiba/extend/registry/requirable_file_spec.rb b/spec/kiba/extend/registry/requirable_file_spec.rb index 1eacf1a74..1e5dd649a 100644 --- a/spec/kiba/extend/registry/requirable_file_spec.rb +++ b/spec/kiba/extend/registry/requirable_file_spec.rb @@ -2,16 +2,16 @@ require 'spec_helper' -class TestClass < Kiba::Extend::RegisteredFile - include Kiba::Extend::RequirableFile +class TestClass < Kiba::Extend::Registry::RegisteredFile + include Kiba::Extend::Registry::RequirableFile end # rubocop:disable Metrics/BlockLength -RSpec.describe 'Kiba::Extend::RequirableFile' do +RSpec.describe 'Kiba::Extend::Registry::RequirableFile' do let(:filekey) { :fkey } let(:path) { File.join('spec', 'fixtures', 'fkey.csv') } let(:default) { { path: path, creator: Helpers.method(:fake_creator_method) } } - let(:klass) { TestClass.new(key: filekey, data: Kiba::Extend::FileRegistryEntry.new(data)) } + let(:klass) { TestClass.new(key: filekey, data: Kiba::Extend::Registry::FileRegistryEntry.new(data)) } context 'when called without creator' do let(:data) { { path: path } } @@ -19,9 +19,9 @@ class TestClass < Kiba::Extend::RegisteredFile msg = "No creator method found for :#{filekey} in file registry" expect { TestClass.new(key: filekey, - data: Kiba::Extend::FileRegistryEntry.new(data)).required + data: Kiba::Extend::Registry::FileRegistryEntry.new(data)).required }.to raise_error( - Kiba::Extend::RequirableFile::NoDependencyCreatorError, msg + Kiba::Extend::Registry::RequirableFile::NoDependencyCreatorError, msg ) end end diff --git a/spec/kiba/extend/fieldset_spec.rb b/spec/kiba/extend/utils/fieldset_spec.rb similarity index 93% rename from spec/kiba/extend/fieldset_spec.rb rename to spec/kiba/extend/utils/fieldset_spec.rb index 49e0bd8ec..fca206126 100644 --- a/spec/kiba/extend/fieldset_spec.rb +++ b/spec/kiba/extend/utils/fieldset_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Kiba::Extend::Fieldset do +RSpec.describe Kiba::Extend::Utils::Fieldset do let(:rows) do [ { a: 'aa', b: 'bb', c: 'cc', d: 'dd' }, @@ -11,7 +11,7 @@ ] end let(:fields) { %i[b c] } - let(:fieldset) { Kiba::Extend::Fieldset.new(fields) } + let(:fieldset) { Kiba::Extend::Utils::Fieldset.new(fields) } describe '#fields' do it 'returns an Array of fields collated by the Fieldset' do expect(fieldset.fields).to eq(fields) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index ced73d33f..ee779ca35 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true +# This needs to be the very first thing in this file +require 'simplecov' +SimpleCov.start + require 'bundler/setup' require 'kiba/extend' require_relative './helpers'