Skip to content

Commit

Permalink
Merge pull request #14 from lyrasis/null_only_field_clean
Browse files Browse the repository at this point in the history
add support for %NULLVALUE% handling to `Clean::EmptyFieldGroups` and `Clean::DelimiterOnlyFields`
  • Loading branch information
kspurgin authored May 20, 2021
2 parents 4e990d1 + 1ecb2d5 commit d408d43
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
kiba-extend (1.9.0)
kiba-extend (1.10.0)
activesupport
kiba (>= 4.0.0)
kiba-common (>= 1.5.0)
Expand Down
43 changes: 35 additions & 8 deletions lib/kiba/extend/transforms/clean.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ module Clean

module Helpers
::Clean::Helpers = Kiba::Extend::Transforms::Clean::Helpers
def delim_only?(val, delim)
def delim_only?(val, delim, usenull = false)
chk = val.gsub(delim, '').strip
chk = chk.gsub('%NULLVALUE%', '').strip if usenull
chk.empty? ? true : false
end
end
Expand Down Expand Up @@ -53,13 +54,14 @@ def process(row)

class DelimiterOnlyFields
include Clean::Helpers
def initialize(delim:)
def initialize(delim:, use_nullvalue: false)
@delim = delim
@use_nullvalue = use_nullvalue
end

def process(row)
row.each do |hdr, val|
row[hdr] = nil if val.is_a?(String) && delim_only?(val, @delim)
row[hdr] = nil if val.is_a?(String) && delim_only?(val, @delim, @use_nullvalue)
end
row
end
Expand All @@ -80,9 +82,16 @@ def process(row)
end

class EmptyFieldGroups
def initialize(groups:, sep:)
# groups is an array of arrays. Each of the arrays inside groups should list all fields that are part
# of a repeating field group or field subgroup
# sep is the repeating delimiter
# use_nullvalue - if true, will insert %NULLVALUE% before any sep at beginning of string, after any sep
# end of string, and between any two sep with nothing in between. It considers %NULLVALUE% as a blank
# value, so if all values in a field are %NULLVALUE%, the field will be nil-ed out.
def initialize(groups:, sep:, use_nullvalue: false)
@groups = groups
@sep = sep
@use_nullvalue = use_nullvalue
end

def process(row)
Expand All @@ -94,10 +103,14 @@ def process(row)

def process_group(row, group)
thisgroup = group.map{ |field| row.fetch(field, '')}
.map{ |val| val.nil? ? [] : " #{val} ".split(@sep) }
.map{ |arr| arr.map{ |e| e.strip } }

cts = thisgroup.map{ |arr| arr.size }.uniq
thisgroup.map!{ |val| add_null_values(val) } if @use_nullvalue

thisgroup.map!{ |val| val.nil? ? [] : " #{val} ".split(@sep) }
.map!{ |arr| arr.map{ |e| e.strip } }

cts = thisgroup.map{ |arr| arr.size }.uniq.reject{ |ct| ct == 0 }

to_delete = []

if cts.size > 1
Expand All @@ -113,9 +126,23 @@ def process_group(row, group)
end
end

def empty_val(str)
return true if str.blank?
return true if str == '%NULLVALUE%' && @use_nullvalue
false
end

def add_null_values(str)
return str if str.nil?

str.sub(/^#{@sep}/, "%NULLVALUE%#{@sep}")
.sub(/#{@sep}$/, "#{@sep}%NULLVALUE%")
.gsub(/#{@sep}#{@sep}/, "#{@sep}%NULLVALUE%#{@sep}")
end

def all_empty?(group, index)
thesevals = group.map{ |arr| arr[index] }
.map{ |val| val.empty? ? nil : val }
.map{ |val| empty_val(val) ? nil : val }
.uniq
.compact
thesevals.empty? ? true : false
Expand Down
2 changes: 1 addition & 1 deletion lib/kiba/extend/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Kiba
module Extend
VERSION = "1.9.1"
VERSION = "1.10.0"
end
end
135 changes: 116 additions & 19 deletions spec/kiba/extend/transforms/clean_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,42 @@
end

describe 'DelimiterOnlyFields' do
test_csv = 'tmp/test.csv'
rows = [
['id', 'in_set'],
['1', 'a; b'],
['2', ';'],
['3', nil]
]
let(:test_csv) { 'tmp/test.csv' }
let(:rows) { [
['id', 'in_set'],
['1', 'a; b'],
['2', ';'],
['3', nil],
['4', '%NULLVALUE%;%NULLVALUE%;%NULLVALUE%']
] }
let(:result) { execute_job(filename: test_csv, xform: Clean::DelimiterOnlyFields, xformopt: options) }

before { generate_csv(test_csv, rows) }
after { File.delete(test_csv) if File.exist?(test_csv) }

before { generate_csv(test_csv, rows) }
let(:result) { execute_job(filename: test_csv, xform: Clean::DelimiterOnlyFields, xformopt: {delim: ';'}) }
context 'when use_nullvalue = false (the default)' do
let(:options) { {delim: ';'} }
it 'changes delimiter only fields to nil' do
expect(result[1][:in_set]).to be_nil
end
it 'leaves other fields unchanged' do
expect(result[0][:in_set]).to eq('a; b')
expect(result[2][:in_set]).to be_nil
expect(result[3][:in_set]).to eq('%NULLVALUE%;%NULLVALUE%;%NULLVALUE%')
end
after { File.delete(test_csv) if File.exist?(test_csv) }
end

context 'when use_nullvalue = true' do
let(:options) { {delim: ';', use_nullvalue: true} }
it 'changes delimiter only fields to nil' do
expect(result[1][:in_set]).to be_nil
expect(result[3][:in_set]).to be_nil
end
it 'leaves other fields unchanged' do
expect(result[0][:in_set]).to eq('a; b')
expect(result[2][:in_set]).to be_nil
end
end
end

describe 'DowncaseFieldValues' do
Expand Down Expand Up @@ -114,17 +132,20 @@
end

describe 'EmptyFieldGroups' do
test_csv = 'tmp/test.csv'

after { File.delete(test_csv) if File.exist?(test_csv) }
it 'Removes field groups where all fields in group are empty' do
rows = [
let(:test_csv) { 'tmp/test.csv' }
let(:rows) { [
['id', 'a1', 'a2', 'b1', 'b2', 'b3'],
['4', 'not;', nil, ';empty', ';empty', ';empty'],
['1', 'not;empty', 'not;empty', 'not;empty', 'not;empty', 'not;empty'],
['2', 'not;', 'not;', ';empty', 'not;empty', ';empty'],
['3', ';', ';', ';empty', ';empty', ';empty']
]
['3', ';', ';', ';empty', ';empty', ';empty'],
['5', '%NULLVALUE%;%NULLVALUE%', '%NULLVALUE%;%NULLVALUE%', 'not;empty', '%NULLVALUE%;empty', 'empty;%NULLVALUE%'],
['6', ';', ';', '%NULLVALUE%;empty', '%NULLVALUE%;empty', '%NULLVALUE%;empty'],
] }
after { File.delete(test_csv) if File.exist?(test_csv) }

context 'When use_nullvalue = false (the default)' do
it 'Removes field groups where all fields in group are empty' do
generate_csv(test_csv, rows)
result = execute_job(filename: test_csv,
xform: Clean::EmptyFieldGroups,
Expand All @@ -133,11 +154,12 @@
%i[a1 a2],
%i[b1 b2 b3]
],
sep: ';'
sep: ';',
use_nullvalue: false
})
expected = [
{:id=>'4',
:a1=>'not;',
:a1=>'not',
:a2=>nil,
:b1=>'empty',
:b2=>'empty',
Expand All @@ -164,9 +186,84 @@
:b2=>'empty',
:b3=>'empty'
},
{:id=>'5',
:a1=>'%NULLVALUE%;%NULLVALUE%',
:a2=>'%NULLVALUE%;%NULLVALUE%',
:b1=>'not;empty',
:b2=>'%NULLVALUE%;empty',
:b3=>'empty;%NULLVALUE%'
},
{:id=>'6',
:a1=>nil,
:a2=>nil,
:b1=>'%NULLVALUE%;empty',
:b2=>'%NULLVALUE%;empty',
:b3=>'%NULLVALUE%;empty'
}
]
expect(result).to eq(expected)
end
end
context 'When use_nullvalue = true' do
it 'Removes field groups where all fields in group are empty' do
generate_csv(test_csv, rows)
result = execute_job(filename: test_csv,
xform: Clean::EmptyFieldGroups,
xformopt: {
groups: [
%i[a1 a2],
%i[b1 b2 b3]
],
sep: ';',
use_nullvalue: true
})
expected = [
{:id=>'4',
:a1=>'not',
:a2=>nil,
:b1=>'empty',
:b2=>'empty',
:b3=>'empty'
},
{:id=>'1',
:a1=>'not;empty',
:a2=>'not;empty',
:b1=>'not;empty',
:b2=>'not;empty',
:b3=>'not;empty'
},
{:id=>'2',
:a1=>'not',
:a2=>'not',
:b1=>'%NULLVALUE%;empty',
:b2=>'not;empty',
:b3=>'%NULLVALUE%;empty'
},
{:id=>'3',
:a1=>nil,
:a2=>nil,
:b1=>'empty',
:b2=>'empty',
:b3=>'empty'
},
{:id=>'5',
:a1=>nil,
:a2=>nil,
:b1=>'not;empty',
:b2=>'%NULLVALUE%;empty',
:b3=>'empty;%NULLVALUE%'
},
{:id=>'6',
:a1=>nil,
:a2=>nil,
:b1=>'empty',
:b2=>'empty',
:b3=>'empty'
}
]
expect(result).to eq(expected)
end
end
end

describe 'RegexpFindReplaceFieldVals' do
Expand Down

0 comments on commit d408d43

Please sign in to comment.