diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index d55a4dd43..4c96cb3bd 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -37,6 +37,7 @@ These changes are merged into the `main` branch, but have not been released. Aft * `Reshape::FieldsToFieldGroupWithConstant` constant value is no longer added to rows with no values in the renamed/remapped value fields, when fieldmap length == 1. (PR#195) === Added +* Ability to pass `find` argument to `Clean::RegexpFindReplaceFieldVals` as a `Regexp` object. Not sure why this was not the default initial behavior, but here we are! (PR#196) === Changed diff --git a/lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb b/lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb index 0fb0b5f7e..11ec36463 100644 --- a/lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb +++ b/lib/kiba/extend/transforms/clean/regexp_find_replace_field_vals.rb @@ -7,7 +7,7 @@ module Clean # Performs specified regular expression find/replace in the specified # field(s) # - # @example Basic match(default) + # @example Basic match(default with find passed as String) # # Used in pipeline as: # # transform Clean::RegexpFindReplaceFieldVals, # # fields: :val, @@ -30,10 +30,10 @@ module Clean # {val: 'x files'} # ] # expect(result).to eq(expected) - # @example Handles start/end anchors + # @example Handles start/end anchors, find passed as Regexp # xform = Clean::RegexpFindReplaceFieldVals.new( # fields: :val, - # find: '^xx+', + # find: /^xx+/, # replace: 'exes' # ) # input = [ @@ -63,6 +63,22 @@ module Clean # {val: 'The object'} # ] # expect(result).to eq(expected) + # @example Case insensitive regexp + # xform = Clean::RegexpFindReplaceFieldVals.new( + # fields: :val, + # find: /thing/i, + # replace: 'object' + # ) + # input = [ + # {val: 'the thing'}, + # {val: 'The Thing'} + # ] + # result = input.map{ |row| xform.process(row) } + # expected = [ + # {val: 'the object'}, + # {val: 'The object'} + # ] + # expect(result).to eq(expected) # @example Matching/replacing line breaks (note double quotes) # xform = Clean::RegexpFindReplaceFieldVals.new( # fields: :val, @@ -202,8 +218,9 @@ class RegexpFindReplaceFieldVals include Allable # @param fields [Array,Symbol,nil] in which to find/replace - # @param find [String] make sure to use double quotes to match slash - # escaped characters (\n, etc) + # @param find [String, Regexp] If passing a string, make + # sure to use double quotes to match slash escaped + # characters (\n, etc) # @param replace [String] # @param casesensitive [Boolean] # @param multival [Boolean] @@ -215,11 +232,7 @@ class RegexpFindReplaceFieldVals def initialize(fields:, find:, replace:, casesensitive: true, multival: false, sep: nil, debug: false) @fields = [fields].flatten - @find = if casesensitive == true - Regexp.new(find) - else - Regexp.new(find, Regexp::IGNORECASE) - end + @find = build_pattern(find, casesensitive) @replace = replace @debug = debug @mv = multival @@ -246,6 +259,19 @@ def process(row) attr_reader :fields, :find, :replace, :debug, :mv, :sep + def build_pattern(find, casesensitive) + case find + when Regexp + find + when String + if casesensitive == true + Regexp.new(find) + else + Regexp.new(find, Regexp::IGNORECASE) + end + end + end + def mv_find_replace(val) val.split(sep).map { |v| v.gsub(find, replace) }.join(sep) end