Skip to content

Commit

Permalink
Merge pull request #66 from lyrasis/csv-write-error
Browse files Browse the repository at this point in the history
bugfix to `Fingerprint::Decode`; add param to `Fingerprint::Add`
  • Loading branch information
kspurgin authored Mar 17, 2022
2 parents 3ab2c31 + f0b4135 commit f33456c
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 4 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ These changes are merged into the `main` branch but have not yet been tagged as
==== Breaking

==== Added
* `Fingerprint::Add` and `Fingerprint::Decode` transforms In https://github.com/lyrasis/kiba-extend/pull/65[PR#65]
* `Fingerprint::Add` and `Fingerprint::Decode` transforms. In https://github.com/lyrasis/kiba-extend/pull/65[PR#65]
* `override_app_delim_check` param to `Fingerprint::Add` for backward compatibility with a project I want to be able to use this transform. Defaults to `false`. https://github.com/lyrasis/kiba-extend/pull/66[PR#66]

==== Changed
* Moves `Merge::CompareFieldsFlag` to `Compare::FieldValues`. Aliases the old transform to the new one for backward compatibility, but raises deprecation warning. In https://github.com/lyrasis/kiba-extend/pull/62[PR#62]
* `Fingerprint::Decode` forces field values to UTF-8, preventing CSV write errors. In https://github.com/lyrasis/kiba-extend/pull/66[PR#66]

==== Deleted

Expand Down
10 changes: 8 additions & 2 deletions lib/kiba/extend/transforms/fingerprint/add.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,15 @@ class Add
# @param fields [Array<Symbol>] fields whose values should be used in fingerprint
# @param delim [String] used to join field values into a hashable string
# @param target [Symbol] field in which fingerprint hash should inserted
# @param override_app_delim_check [Boolean] if true, will let you create a fingerprint with a delim
# that contains or is contained by `Kiba::Extend.delim` or `Kiba::Extend.sgdelim`. Setting this
# to `true` is dangerous and could result in un-decodeable fingerprints.
# @raise [DelimiterCollisionError] if `delim` conflicts with
# `Kiba::Extend.delim` or `Kiba::Extend.sgdelim`
# @raise [DelimiterInValueError] if the value of any field used to generate a fingerprint contains the
# fingerprint delimiter
def initialize(fields:, delim:, target:)
def initialize(fields:, delim:, target:, override_app_delim_check: false)
@override_app_delim_check = override_app_delim_check
check_delim(delim)
@fingerprinter = Kiba::Extend::Utils::FingerprintCreator.new(fields: fields, delim: delim)
@target = target
Expand All @@ -80,9 +84,11 @@ def process(row)

private

attr_reader :fingerprinter, :target
attr_reader :fingerprinter, :target, :override_app_delim_check

def check_delim(delim)
return if override_app_delim_check

raise Kiba::Extend::Transforms::Fingerprint::DelimiterCollisionError if delim[Kiba::Extend.delim]
raise Kiba::Extend::Transforms::Fingerprint::DelimiterCollisionError if delim[Kiba::Extend.sgdelim]
raise Kiba::Extend::Transforms::Fingerprint::DelimiterCollisionError if Kiba::Extend.delim[delim]
Expand Down
8 changes: 7 additions & 1 deletion lib/kiba/extend/transforms/fingerprint/decode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def process(row)
.map{ |val| val == 'empty' ? '' : val }
check_length(decoded)

target_fields.each_with_index{ |target, i| row[target] = decoded[i] }
target_fields.each_with_index{ |target, i| row[target] = safe_decoded_value(decoded[i]) }
row
end

Expand All @@ -98,6 +98,12 @@ def check_length(decoded)

warn("#{Kiba::Extend.warning_label}: ROW #{@row_ct}: Expected #{num_fields} fields from decoded fingerprint. Got #{result_length}")
end

def safe_decoded_value(val)
return val if val.blank?

val.force_encoding('UTF-8')
end
end
end
end
Expand Down
18 changes: 18 additions & 0 deletions spec/kiba/extend/transforms/fingerprint/add_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@
it 'raises error' do
expect{ result }.to raise_error(Kiba::Extend::Transforms::Fingerprint::DelimiterCollisionError)
end

context 'when override_app_delim_check = true' do
let(:transforms) do
Kiba.job_segment do
transform Fingerprint::Add, fields: %i[b c d e], delim: '|||', target: :fp, override_app_delim_check: true
end
end

let(:expected) do
[
{a: 'ant', b: 'bee', c: nil, d: 'deer', e: '', fp: Base64.strict_encode64('bee|||nil|||deer|||empty')}
]
end

it 'transforms as expected' do
expect(result).to eq(expected)
end
end
end

context 'when a field value matches the delimiter' do
Expand Down
24 changes: 24 additions & 0 deletions spec/kiba/extend/transforms/fingerprint/decode_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,28 @@
expect(result).to eq(expected)
end
end

context 'with field value containing non-ASCII characters' do
let(:input) do
[
{fp: 'YTs7O0hhZ3N0csO2bTs7O25pbA=='}
]
end

let(:transforms) do
Kiba.job_segment do
transform Fingerprint::Decode, fingerprint: :fp, source_fields: %i[b c d], delim: ';;;', prefix: 'fp', delete_fp: true
end
end

let(:expected) do
[
{fp_b: 'a', fp_c: 'Hagström', fp_d: nil}
]
end

it 'transforms as expected' do
expect(result).to eq(expected)
end
end
end

0 comments on commit f33456c

Please sign in to comment.