-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fraction::ToDecimal
transform (#108)
* add Data::ConvertibleFraction value object class * add Utils::ExtractFractions service object * add Fraction::ToDecimal transform
- Loading branch information
Showing
7 changed files
with
646 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# frozen_string_literal: true | ||
|
||
module Kiba | ||
module Extend | ||
module Data | ||
# Value object encoding an extracted string fraction (e.g. '1 1/2') so it can be converted. | ||
# | ||
# Can represent invalid/non-convertible "fractions" | ||
class ConvertibleFraction | ||
include Comparable | ||
|
||
attr_reader :whole, :fraction, :position | ||
|
||
# @param whole [Integer] whole number preceding a fraction | ||
# @param fraction [String] | ||
# @param position [Range] indicates position of fractional data within original string | ||
def initialize(whole: 0, fraction:, position:) | ||
fail(TypeError, '`whole` must be an Integer') unless whole.is_a?(Integer) | ||
fail(TypeError, '`position` must be a Range') unless position.is_a?(Range) | ||
@whole = whole.freeze | ||
@fraction = fraction.freeze | ||
@position = position.freeze | ||
end | ||
|
||
# @param val [String] the value in which textual fraction will be replaced with a decimal | ||
# @param places [Integer] maximum number of decimal places to keep in the resulting decimal value | ||
# @return [String] | ||
def replace_in(val:, places: 4) | ||
return val unless convertible? | ||
|
||
[prefix ? val[prefix] : '', to_s(places), val[suffix]].compact.join | ||
end | ||
|
||
# @return [Float] | ||
def to_f | ||
return nil unless convertible? | ||
|
||
( Rational(fraction) + whole ).to_f | ||
end | ||
|
||
# @param places [Integer] | ||
# @return [String] | ||
def to_s(places = 4) | ||
return nil unless convertible? | ||
|
||
( Rational(fraction) + whole ).round(+places).to_f.to_s | ||
end | ||
|
||
# @return [Boolean] whether the fraction is indeed convertible | ||
def convertible? | ||
Rational(fraction) | ||
rescue ZeroDivisionError | ||
false | ||
else | ||
true | ||
end | ||
|
||
def ==(other) | ||
whole == other.whole && fraction == other.fraction && position == other.position | ||
end | ||
alias_method :eql?, :== | ||
|
||
def <=>(other) | ||
position.first <=> other.position.first | ||
end | ||
|
||
def hash | ||
[self.class, whole, fraction, position].hash | ||
end | ||
|
||
def to_h | ||
{whole: whole, fraction: fraction, position: position} | ||
end | ||
|
||
private | ||
|
||
def prefix | ||
return nil if position.min == 0 | ||
|
||
0..position.min - 1 | ||
end | ||
|
||
def suffix | ||
position.max + 1..-1 | ||
end | ||
|
||
end | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# frozen_string_literal: true | ||
|
||
module Kiba | ||
module Extend | ||
module Transforms | ||
# Transforms to deal with fractions in field values | ||
module Fraction | ||
::Fraction = Kiba::Extend::Transforms::Fraction | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
# frozen_string_literal: true | ||
|
||
module Kiba | ||
module Extend | ||
module Transforms | ||
module Fraction | ||
# Converts fractions expressed like "1 1/4" to decimals like "1.25" | ||
# | ||
# @example Defaults and general behavior/value handling | ||
# # Used in pipeline as: | ||
# # transform Fraction::ToDecimal, fields: :dim | ||
# xform = Fraction::ToDecimal.new(fields: :dim) | ||
# input = [ | ||
# {dim: nil}, | ||
# {dim: ''}, | ||
# {dim: 'foo'}, | ||
# {dim: '1/2'}, | ||
# {dim: '6-1/4 x 9-1/4'}, | ||
# {dim: '10 5/8x13'}, | ||
# {dim: '1 2/3 x 5 1/2'}, | ||
# {dim: 'approximately 2/3 by 1/2in (height unknown)'} | ||
# ] | ||
# result = input.map{ |row| xform.process(row) } | ||
# expected = [ | ||
# {dim: nil}, | ||
# {dim: ''}, | ||
# {dim: 'foo'}, | ||
# {dim: '0.5'}, | ||
# {dim: '6.25 x 9.25'}, | ||
# {dim: '10.625x13'}, | ||
# {dim: '1.6667 x 5.5'}, | ||
# {dim: 'approximately 0.6667 by 0.5in (height unknown)'} | ||
# ] | ||
# expect(result).to eq(expected) | ||
# | ||
# @example Multiple fields and targets | ||
# # Used in pipeline as: | ||
# # transform Fraction::ToDecimal, fields: %i[w h], targets: %i[width height] | ||
# xform = Fraction::ToDecimal.new(fields: %i[w h], targets: %i[width height]) | ||
# input = [{w: '8 1/2', h: '11'}] | ||
# result = input.map{ |row| xform.process(row) } | ||
# expected = [{w: '8 1/2', h: '11', width: '8.5', height: '11'}] | ||
# expect(result).to eq(expected) | ||
# | ||
# @example Multiple fields and targets, and `delete_sources` = true | ||
# # Used in pipeline as: | ||
# # transform Fraction::ToDecimal, fields: %i[w h], targets: %i[w height], delete_sources: true | ||
# xform = Fraction::ToDecimal.new(fields: %i[w h], targets: %i[w height], delete_sources: true) | ||
# input = [{w: '8 1/2', h: '11'}] | ||
# result = input.map{ |row| xform.process(row) } | ||
# expected = [{w: '8.5', height: '11'}] | ||
# expect(result).to eq(expected) | ||
# | ||
# @example `target_format: :float` and `places: 2` | ||
# # Used in pipeline as: | ||
# # transform Fraction::ToDecimal, fields: :w, target_format: :float, places: 2 | ||
# xform = Fraction::ToDecimal.new(fields: :w, target_format: :float, places: 2) | ||
# input = [ | ||
# {w: '8-2/3'}, | ||
# {w: '2/3 in'} | ||
# ] | ||
# result = input.map{ |row| xform.process(row) } | ||
# expected = [ | ||
# {w: 8.67}, | ||
# {w: '0.67 in'} | ||
# ] | ||
# expect(result).to eq(expected) | ||
# | ||
# @example `target_format: :float, places: 2, whole_fraction_sep: [' ']` | ||
# # Used in pipeline as: | ||
# # transform Fraction::ToDecimal, | ||
# # fields: :w, | ||
# # target_format: :float, | ||
# # places: 2, | ||
# # whole_fraction_sep: [' '] | ||
# xform = Fraction::ToDecimal.new( | ||
# fields: :w, target_format: :float, places: 2, whole_fraction_sep: [' '] | ||
# ) | ||
# input = [ | ||
# {w: '8-2/3'}, | ||
# {w: '2/3 in'} | ||
# ] | ||
# result = input.map{ |row| xform.process(row) } | ||
# expected = [ | ||
# {w: '8-0.67'}, | ||
# {w: '0.67 in'} | ||
# ] | ||
# expect(result).to eq(expected) | ||
class ToDecimal | ||
# @param fields [Symbol, Array(Symbol)] Source data fields. If no targets given, converted values are | ||
# written back into the original fields. | ||
# @param targets [nil, Symbol, Array(Symbol)] Target data fields, if different from source data fields. | ||
# If `targets` are specified at all, a target must be specified for each value in `fields`. The target | ||
# for a given field can be the same as the given field, however. | ||
# @param target_format [:string, :float] If fractions are being extracted from longer text strings and replaced, | ||
# this should always be `:string`. Likewise if there may be more than one fraction in a given field value. | ||
# This is the usual expected case. If the source data fields are known to only contain single fraction | ||
# values and you need to use them in calculations in subsequent transforms within the same job, you may | ||
# wish to set this as `:float` for greater accuracy. | ||
# @param places [Integer] Number of decimal places. Applied if `target_format` = `:string` | ||
# @param whole_fraction_sep [Array(String)] List of characters that precede a fraction after a whole | ||
# number, indicating that the whole number and fraction should be extracted together. | ||
# See {Utils::ExtractFractions} for further explanation. | ||
# @param delete_sources [Boolean] If `targets` are given, `fields` are deleted from row. Has no effect | ||
# if no `targets` are given, or if the target for a field equals the field. | ||
def initialize(fields:, | ||
targets: nil, | ||
target_format: :string, | ||
places: 4, | ||
whole_fraction_sep: [' ', '-'], | ||
delete_sources: false) | ||
@fields = [fields].flatten | ||
@targets = targets ? [targets].flatten : nil | ||
@target_format = target_format | ||
@places = places | ||
@delete_sources = delete_sources | ||
@extractor = Kiba::Extend::Utils::ExtractFractions.new(whole_fraction_sep: whole_fraction_sep) | ||
end | ||
|
||
# @param row [Hash{ Symbol => String }] | ||
def process(row) | ||
fields.each{ |field| to_decimal(field, row) } | ||
delete_source_fields(row) | ||
row | ||
end | ||
|
||
private | ||
|
||
attr_reader :fields, :targets, :target_format, :places, :delete_sources, :extractor | ||
|
||
def delete_source_fields(row) | ||
return unless delete_sources && targets | ||
|
||
fields.each_with_index do |field, ind| | ||
row.delete(field) unless targets[ind] == field | ||
end | ||
end | ||
|
||
def floatable?(value) | ||
true unless value.match?(/[^0-9.]/) | ||
end | ||
|
||
def format_field_value(value) | ||
return value unless target_format == :float && floatable?(value) | ||
|
||
value.to_f | ||
end | ||
|
||
def replace_fractions(fractions, value) | ||
val = value.dup | ||
fractions.each do |fraction| | ||
val = fraction.replace_in(val: val, places: places) | ||
end | ||
val | ||
end | ||
|
||
def to_decimal(field, row) | ||
targetfield = target(field) | ||
fieldval = row[field] | ||
row[targetfield] = fieldval | ||
return if fieldval.blank? | ||
|
||
fractions = extractor.call(fieldval) | ||
return if fractions.empty? | ||
|
||
replaced = replace_fractions(fractions, fieldval) | ||
formatted = format_field_value(replaced) | ||
|
||
row[targetfield] = formatted | ||
end | ||
|
||
def target(srcfield) | ||
return srcfield unless targets | ||
|
||
ind = fields.find_index(srcfield) | ||
targets[ind] | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'strscan' | ||
|
||
module Kiba | ||
module Extend | ||
module Utils | ||
# Extracts {Data::ConvertibleFractions} from given String and returns only fractions that can be | ||
# converted to decimal, in the order they will need to be replaced in the string | ||
class ExtractFractions | ||
# @param whole_fraction_sep [Array(String)] List of characters that precede a fraction after a whole | ||
# number, indicating that the whole number and fraction should be extracted together. If this is | ||
# set to `[' ', '-']` (the default), then both `1 1/2` and `1-1/2` will be extracted with `1` as | ||
# the whole number and `1/2` as the fraction, and converted to `1.5`. If this is set to `[' ']`, | ||
# then `1 1/2` will be extracted as described preveiously. For `1-1/2`, no whole number value | ||
# will be extracted. `1/2` will be extracted as the fraction, and it will be converted to '0.5'. | ||
def initialize(whole_fraction_sep: [' ', '-']) | ||
@whole_fraction_sep = whole_fraction_sep | ||
@fpattern = /(\d+\/\d+)/ | ||
@fraction = Kiba::Extend::Data::ConvertibleFraction | ||
end | ||
|
||
# @param value [String] | ||
def call(value) | ||
return [] unless value.match?(fpattern) | ||
|
||
result = [] | ||
scanner = StringScanner.new(value) | ||
scan(scanner, result) | ||
result.each do |fraction| | ||
unless fraction.convertible? | ||
warn("#{self.class.name}: Unconvertible fraction: #{value[fraction.position]}") | ||
end | ||
end | ||
result.sort.reverse | ||
end | ||
|
||
private | ||
|
||
attr_reader :fpattern, :whole_fraction_sep, :fraction | ||
|
||
def extract_fraction(scanner, result) | ||
startpos = scanner.pos | ||
scanner.scan(fpattern) | ||
result << fraction.new(**{fraction: scanner.captures[0], position: startpos..scanner.pos - 1 }) | ||
end | ||
|
||
def try_whole_fraction_extract(scanner, result) | ||
startpos = scanner.pos | ||
whole_num = scanner.scan(/\d+/).to_i | ||
sep = scanner.scan(/./) | ||
fmatch = scanner.match?(fpattern) | ||
if whole_fraction_sep.any?(sep) && fmatch | ||
result << fraction.new(**{whole: whole_num, fraction: scanner.scan(fpattern), position: startpos..scanner.pos - 1 }) | ||
end | ||
end | ||
|
||
def scan(scanner, result) | ||
return if scanner.eos? | ||
return if scanner.rest_size < 3 | ||
return unless scanner.exist?(fpattern) | ||
|
||
scan_next(scanner, result) | ||
end | ||
|
||
def scan_next(scanner, result) | ||
scanner.skip(/\D+/) | ||
if scanner.match?(fpattern) | ||
extract_fraction(scanner, result) | ||
else | ||
try_whole_fraction_extract(scanner, result) | ||
end | ||
scan(scanner, result) | ||
end | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.