From 77ac67992ef8cc46ce2d04dda9069ed2a4d575b0 Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Fri, 1 Oct 2021 20:47:44 -0400 Subject: [PATCH] document transforms --- lib/kiba/extend/transforms/append.rb | 56 ++++++++++++ lib/kiba/extend/transforms/clean.rb | 124 ++++++++++++++++++++++++--- 2 files changed, 169 insertions(+), 11 deletions(-) diff --git a/lib/kiba/extend/transforms/append.rb b/lib/kiba/extend/transforms/append.rb index 99fbb8995..fd1995bed 100644 --- a/lib/kiba/extend/transforms/append.rb +++ b/lib/kiba/extend/transforms/append.rb @@ -6,6 +6,32 @@ module Transforms # Adds values to the end of fields or rows module Append ::Append = Kiba::Extend::Transforms::Append + + # Adds the given field(s) to the row with nil value if they do not already exist in row + # + # # Examples + # + # Input table: + # + # ``` + # | z | + # |----| + # | zz | + # ``` + # + # Used in pipeline as: + # + # ``` + # transform Append::NilFields, fields: %i[a b c z] + # ``` + # + # Results in: + # + # ``` + # | z | a | b | c | + # |----+-----+-----+-----| + # | zz | nil | nil | nil | + # ``` class NilFields def initialize(fields:) @fields = [fields].flatten @@ -20,6 +46,36 @@ def process(row) end end + # Adds the given value to the end of value of the given field. Does not affect nil/empty field values + # + # # Examples + # + # Input table: + # + # ``` + # ``` + # | name | + # |-------| + # | Weddy | + # | nil | + # | | + # ``` + # + # Used in pipeline as: + # + # ``` + # transform Append::ToFieldValue, field: :name, value: ' (name)' + # ``` + # + # Results in: + # + # ``` + # | name | + # |--------------| + # | Weddy (name) | + # | nil | + # | | + # ``` class ToFieldValue def initialize(field:, value:) @field = field diff --git a/lib/kiba/extend/transforms/clean.rb b/lib/kiba/extend/transforms/clean.rb index 53cf2261e..ae1833172 100644 --- a/lib/kiba/extend/transforms/clean.rb +++ b/lib/kiba/extend/transforms/clean.rb @@ -9,15 +9,117 @@ module Transforms module Clean ::Clean = Kiba::Extend::Transforms::Clean + # Sorts the multiple values within a field alphabetically + # # @note This transformation does **NOT** sort the **ROWS** in a dataset. It sorts values within # individual fields of a row - # Sorts the multiple values within a field alphabetically - # @param fields [Array(Symbol)] names of fields to sort - # @param delim [String] Character(s) on which to split field values - # @param usenull [Boolean] Whether to treat %NULLVALUE% as a blank in processing - # @param direction [:asc, :desc] Direction in which to sort field values + # + # # Examples + # + # Input table: + # + # ``` + # | type | + # |------------------------------| + # | Person;unmapped;Organization | + # | ; | + # | nil | + # | | + # | Person;notmapped | + # | %NULLVALUE%;apple | + # | oatmeal;%NULLVALUE% | + # ``` + # + # Used in pipeline as: + # + # ``` + # transform Clean::AlphabetizeFieldValues, fields: %i[type], delim: ';', usenull: false, + # direction: :asc + # ``` + # + # Results in: + # + # ``` + # | type | + # |------------------------------| + # | Organization;Person;unmapped | + # | ; | + # | nil | + # | | + # | notmapped;Person | + # | apple;%NULLVALUE% | + # | %NULLVALUE%;oatmeal | + # ``` + # + # Used in pipeline as: + # + # ``` + # transform Clean::AlphabetizeFieldValues, fields: %i[type], delim: ';', usenull: false, + # direction: :desc + # ``` + # + # Results in: + # + # ``` + # | type | + # |------------------------------| + # | unmapped;Person;Organization | + # | ; | + # | nil | + # | | + # | Person;notmapped | + # | %NULLVALUE%;apple | + # | oatmeal;%NULLVALUE% | + # ``` + # + # Used in pipeline as: + # + # ``` + # transform Clean::AlphabetizeFieldValues, fields: %i[type], delim: ';', usenull: true, + # direction: :asc + # ``` + # + # Results in: + # + # ``` + # | type | + # |------------------------------| + # | Organization;Person;unmapped | + # | ; | + # | nil | + # | | + # | notmapped;Person | + # | apple;%NULLVALUE% | + # | oatmeal;%NULLVALUE% | + # ``` + # + # Used in pipeline as: + # + # ``` + # transform Clean::AlphabetizeFieldValues, fields: %i[type], delim: ';', usenull: true, + # direction: :desc + # ``` + # + # Results in: + # + # ``` + # | type | + # |------------------------------| + # | unmapped;Person;Organization | + # | ; | + # | nil | + # | | + # | Person;notmapped | + # | %NULLVALUE%;apple | + # | %NULLVALUE%;oatmeal | + # ``` class AlphabetizeFieldValues include Kiba::Extend::Transforms::Helpers + + # @param fields [Array(Symbol)] names of fields to sort + # @param delim [String] Character(s) on which to split field values + # @param usenull [Boolean] Whether to treat %NULLVALUE% as a blank in processing + # @param direction [:asc, :desc] Direction in which to sort field values def initialize(fields:, delim:, usenull: false, direction: :asc) @fields = [fields].flatten @delim = delim @@ -127,7 +229,7 @@ def process_group(row, group) thisgroup.map! { |val| add_null_values(val) } if @use_nullvalue thisgroup.map! { |val| val.nil? ? [] : " #{val} ".split(@sep) } - .map! { |arr| arr.map(&:strip) } + .map! { |arr| arr.map(&:strip) } cts = thisgroup.map(&:size).uniq.reject(&:zero?) @@ -157,15 +259,15 @@ def add_null_values(str) return str if str.nil? str.sub(/^#{@sep}/, "%NULLVALUE%#{@sep}") - .sub(/#{@sep}$/, "#{@sep}%NULLVALUE%") - .gsub(/#{@sep}#{@sep}/, "#{@sep}%NULLVALUE%#{@sep}") + .sub(/#{@sep}$/, "#{@sep}%NULLVALUE%") + .gsub(/#{@sep}#{@sep}/, "#{@sep}%NULLVALUE%#{@sep}") end def all_empty?(group, index) thesevals = group.map { |arr| arr[index] } - .map { |val| empty_val(val) ? nil : val } - .uniq - .compact + .map { |val| empty_val(val) ? nil : val } + .uniq + .compact thesevals.empty? ? true : false end end