Skip to content

Commit

Permalink
Merge pull request #16 from lyrasis/multi-multi-merge
Browse files Browse the repository at this point in the history
Multi multi merge
  • Loading branch information
kspurgin authored May 26, 2021
2 parents 1abc820 + b239937 commit 9a23a4f
Show file tree
Hide file tree
Showing 7 changed files with 373 additions and 259 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
kiba-extend (1.10.0)
kiba-extend (1.11.0)
activesupport
kiba (>= 4.0.0)
kiba-common (>= 1.5.0)
Expand Down
38 changes: 20 additions & 18 deletions lib/kiba/extend.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,26 @@ module Extend
autoload :VERSION, 'extend/version'

puts "kiba-extend version: #{Kiba::Extend::VERSION}"
require 'kiba/extend/destinations/csv'
require 'kiba/extend/transforms/append'
require 'kiba/extend/transforms/clean'
require 'kiba/extend/transforms/combine_values'
require 'kiba/extend/transforms/copy'
require 'kiba/extend/transforms/deduplicate'
require 'kiba/extend/transforms/cspace'
require 'kiba/extend/transforms/delete'
require 'kiba/extend/transforms/explode'
require 'kiba/extend/transforms/filter_rows'
require 'kiba/extend/transforms/merge'
require 'kiba/extend/transforms/ms_access'
require 'kiba/extend/transforms/prepend'
require 'kiba/extend/transforms/rename'
require 'kiba/extend/transforms/replace'
require 'kiba/extend/transforms/reshape'
require 'kiba/extend/transforms/split'
require 'kiba/extend/utils/lookup'

require 'kiba/extend/fieldset'
require 'kiba/extend/destinations/csv'
require 'kiba/extend/transforms/append'
require 'kiba/extend/transforms/clean'
require 'kiba/extend/transforms/combine_values'
require 'kiba/extend/transforms/copy'
require 'kiba/extend/transforms/deduplicate'
require 'kiba/extend/transforms/cspace'
require 'kiba/extend/transforms/delete'
require 'kiba/extend/transforms/explode'
require 'kiba/extend/transforms/filter_rows'
require 'kiba/extend/transforms/merge'
require 'kiba/extend/transforms/ms_access'
require 'kiba/extend/transforms/prepend'
require 'kiba/extend/transforms/rename'
require 'kiba/extend/transforms/replace'
require 'kiba/extend/transforms/reshape'
require 'kiba/extend/transforms/split'
require 'kiba/extend/utils/lookup'

# strips, collapses multiple spaces, removes terminal commas, strips again
CSV::Converters[:stripplus] = lambda{ |s|
Expand Down
63 changes: 63 additions & 0 deletions lib/kiba/extend/fieldset.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
module Kiba
module Extend
class Fieldset
def initialize(fields)
@hash = {}
fields.each{ |field| @hash[field] = [] }
end

def add_constant_values(field, value)
@hash[field] = []
value_ct.times{ @hash[field] << value }
end

def fields
@hash.keys
end

def hash
@hash
end

def join_values(delim)
@hash.transform_values!{ |vals| vals.join(delim) }
end

def populate(rows)
return if rows.empty?

rows.each{ |row| get_field_values(row) }
remove_valueless_rows
end

def value_ct
@hash.values.first.length
end

private

def get_field_values(row)
fields.each do |field|
fetched = row.fetch(field, nil)
value = fetched.blank? ? nil : fetched
@hash[field] << value
end
end

def remove_valueless_rows
valueless_indices.each do |index|
@hash.each{ |field, values| values.delete_at(index) }
end
end

def valueless_indices
indices = []
@hash.values.first.each_with_index do |element, i|
indices << i if @hash.values.map{ |vals| vals[i] }.compact.empty?
end
indices.sort.reverse
end

end
end
end
83 changes: 40 additions & 43 deletions lib/kiba/extend/transforms/merge.rb
Original file line number Diff line number Diff line change
Expand Up @@ -115,62 +115,59 @@ def process(row)
# into the target, AND THE TARGET IS MULTIVALUED
class MultiRowLookup
def initialize(fieldmap:, constantmap: {}, lookup:, keycolumn:,
conditions: {}, delim: DELIM)
conditions: {}, multikey: false, delim: DELIM)
@fieldmap = fieldmap # hash of looked-up values to merge in for each merged-in row
@constantmap = constantmap #hash of constants to add for each merged-in row
@lookup = lookup #lookuphash; should be created with csv_to_multi_hash
@keycolumn = keycolumn #column in main table containing value expected to be lookup key
@constantmap = constantmap # hash of constants to add for each merged-in row
@lookup = lookup # lookuphash; should be created with csv_to_multi_hash
@keycolumn = keycolumn # column in main table containing value expected to be lookup key
@multikey = multikey # should the key be treated as multivalued
@conditions = conditions
@delim = delim
end

def process(row)
id = row.fetch(@keycolumn)
fh = {}
ch = {}
@fieldmap.each_key{ |k| fh[k] = [] }
@constantmap.each_key{ |k| ch[k] = [] }
id_data = row.fetch(@keycolumn)
ids = @multikey ? id_data.split(@delim) : [id_data]
field_data = Kiba::Extend::Fieldset.new(@fieldmap.values)

merge_rows = @lookup.fetch(id, [])

if merge_rows.size > 0
keep_rows = Lookup::RowSelector.new(
origrow: row,
mergerows: @lookup.fetch(id, []),
conditions: @conditions,
sep: @delim
).result

keep_rows.each do |mrow|
mergevals = []
@fieldmap.each do |target, source|
val = mrow.fetch(source, nil)
result = val.blank? ? nil : val
fh[target] << result
mergevals << result
end
if mergevals.compact.empty?
@constantmap.each{ |target, value| ch[target] << nil }
else
@constantmap.each{ |target, value| ch[target] << value }
end
end
ids.each do |id|
field_data.populate(rows_to_merge(id, row))
end

chk = @fieldmap.map{ |target, source| fh[target].compact.size }.uniq.sort
@constantmap.each do |field, value|
field_data.add_constant_values(field, value)
end

if chk == [0]
fh.each{ |target, arr| row[target] = nil }
ch.each{ |target, arr| row[target] = nil }
else
fh.each{ |target, arr| row[target] = arr.join(@delim) }
ch.each{ |target, arr| row[target] = arr.join(@delim) }
end
else
@fieldmap.keys.each{ |f| row[f] = nil }
@constantmap.keys.each{ |f| row[f] = nil }
field_data.join_values(@delim)

field_data.hash.each do |field, value|
row[target_field(field)] = value.blank? ? nil : value
end

row
end

private

def target_field(field)
target = @fieldmap.key(field)
return target unless target.nil?

field
end

def rows_to_merge(id, sourcerow)
matches = @lookup.fetch(id, [])
return matches if matches.empty?

Lookup::RowSelector.new(
origrow: sourcerow,
mergerows: @lookup.fetch(id, []),
conditions: @conditions,
sep: @delim
).result
end
end
end # module Merge
end #module Transforms
Expand Down
2 changes: 1 addition & 1 deletion lib/kiba/extend/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Kiba
module Extend
VERSION = "1.11.0"
VERSION = "1.12.0"
end
end
53 changes: 53 additions & 0 deletions spec/kiba/extend/fieldset_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
require 'spec_helper'

RSpec.describe Kiba::Extend::Fieldset do
let(:rows) { [
{a: 'aa', b: 'bb', c: 'cc', d: 'dd'},
{a: 'aa', b: 'bee', c: 'cee', d: 'dd'},
{a: 'aa', b: nil, c: '', d: 'dd'}
] }
let(:fields) { %i[b c] }
let(:fieldset) { Kiba::Extend::Fieldset.new(fields) }
describe '#fields' do
it 'returns an Array of fields collated by the Fieldset' do
expect(fieldset.fields).to eq(fields)
end
end
describe '#populate' do
it 'populates hash with field values from given rows' do
fieldset.populate(rows)
expected = [['bb', 'bee'], ['cc', 'cee']]
expect(fieldset.hash.values).to eq(expected)
end
end

describe '#add_constant_values' do
it 'populates hash with constant values' do
fieldset.populate(rows)
fieldset.add_constant_values(:f, 'ffff')
expected = [['bb', 'bee'], ['cc', 'cee'], ['ffff', 'ffff']]
expect(fieldset.hash.values).to eq(expected)
end

it 'adds field, but does not add constant values to it for empty rows' do
rows = [
{a: 'aa'},
{a: 'aa', b: ''}
]
fieldset.populate(rows)
fieldset.add_constant_values(:f, 'ffff')
expected = [[], [], []]
expect(fieldset.hash.values).to eq(expected)
end
end

describe '#join_values' do
it 'joins hash values' do
fieldset.populate(rows)
fieldset.add_constant_values(:f, 'ffff')
fieldset.join_values('|')
expected = ['bb|bee', 'cc|cee', 'ffff|ffff']
expect(fieldset.hash.values).to eq(expected)
end
end
end
Loading

0 comments on commit 9a23a4f

Please sign in to comment.