Skip to content

Commit

Permalink
Fraction::ToDecimal transform (#108)
Browse files Browse the repository at this point in the history
* add Data::ConvertibleFraction value object class
* add Utils::ExtractFractions service object
* add Fraction::ToDecimal transform
  • Loading branch information
kspurgin authored Sep 22, 2022
1 parent 0a334ef commit 4a42e3f
Show file tree
Hide file tree
Showing 7 changed files with 646 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ These changes are merged into the `main` branch, but have not been released. Aft
=== Breaking

=== Added
* `Fraction::ToDecimal` transform (and supporting `Utils::ExtractFractions` and `Data::ConvertibleFraction` classes) (PR#108)
* `yardspec` gem to support running YARD examples as RSpec tests (PR#107)
* Branch coverage to `simplecov` setup (PR#107)

Expand Down
91 changes: 91 additions & 0 deletions lib/kiba/extend/data/convertible_fraction.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# frozen_string_literal: true

module Kiba
module Extend
module Data
# Value object encoding an extracted string fraction (e.g. '1 1/2') so it can be converted.
#
# Can represent invalid/non-convertible "fractions"
class ConvertibleFraction
include Comparable

attr_reader :whole, :fraction, :position

# @param whole [Integer] whole number preceding a fraction
# @param fraction [String]
# @param position [Range] indicates position of fractional data within original string
def initialize(whole: 0, fraction:, position:)
fail(TypeError, '`whole` must be an Integer') unless whole.is_a?(Integer)
fail(TypeError, '`position` must be a Range') unless position.is_a?(Range)
@whole = whole.freeze
@fraction = fraction.freeze
@position = position.freeze
end

# @param val [String] the value in which textual fraction will be replaced with a decimal
# @param places [Integer] maximum number of decimal places to keep in the resulting decimal value
# @return [String]
def replace_in(val:, places: 4)
return val unless convertible?

[prefix ? val[prefix] : '', to_s(places), val[suffix]].compact.join
end

# @return [Float]
def to_f
return nil unless convertible?

( Rational(fraction) + whole ).to_f
end

# @param places [Integer]
# @return [String]
def to_s(places = 4)
return nil unless convertible?

( Rational(fraction) + whole ).round(+places).to_f.to_s
end

# @return [Boolean] whether the fraction is indeed convertible
def convertible?
Rational(fraction)
rescue ZeroDivisionError
false
else
true
end

def ==(other)
whole == other.whole && fraction == other.fraction && position == other.position
end
alias_method :eql?, :==

def <=>(other)
position.first <=> other.position.first
end

def hash
[self.class, whole, fraction, position].hash
end

def to_h
{whole: whole, fraction: fraction, position: position}
end

private

def prefix
return nil if position.min == 0

0..position.min - 1
end

def suffix
position.max + 1..-1
end

end
end
end
end

12 changes: 12 additions & 0 deletions lib/kiba/extend/transforms/fraction.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

module Kiba
module Extend
module Transforms
# Transforms to deal with fractions in field values
module Fraction
::Fraction = Kiba::Extend::Transforms::Fraction
end
end
end
end
182 changes: 182 additions & 0 deletions lib/kiba/extend/transforms/fraction/to_decimal.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# frozen_string_literal: true

module Kiba
module Extend
module Transforms
module Fraction
# Converts fractions expressed like "1 1/4" to decimals like "1.25"
#
# @example Defaults and general behavior/value handling
# # Used in pipeline as:
# # transform Fraction::ToDecimal, fields: :dim
# xform = Fraction::ToDecimal.new(fields: :dim)
# input = [
# {dim: nil},
# {dim: ''},
# {dim: 'foo'},
# {dim: '1/2'},
# {dim: '6-1/4 x 9-1/4'},
# {dim: '10 5/8x13'},
# {dim: '1 2/3 x 5 1/2'},
# {dim: 'approximately 2/3 by 1/2in (height unknown)'}
# ]
# result = input.map{ |row| xform.process(row) }
# expected = [
# {dim: nil},
# {dim: ''},
# {dim: 'foo'},
# {dim: '0.5'},
# {dim: '6.25 x 9.25'},
# {dim: '10.625x13'},
# {dim: '1.6667 x 5.5'},
# {dim: 'approximately 0.6667 by 0.5in (height unknown)'}
# ]
# expect(result).to eq(expected)
#
# @example Multiple fields and targets
# # Used in pipeline as:
# # transform Fraction::ToDecimal, fields: %i[w h], targets: %i[width height]
# xform = Fraction::ToDecimal.new(fields: %i[w h], targets: %i[width height])
# input = [{w: '8 1/2', h: '11'}]
# result = input.map{ |row| xform.process(row) }
# expected = [{w: '8 1/2', h: '11', width: '8.5', height: '11'}]
# expect(result).to eq(expected)
#
# @example Multiple fields and targets, and `delete_sources` = true
# # Used in pipeline as:
# # transform Fraction::ToDecimal, fields: %i[w h], targets: %i[w height], delete_sources: true
# xform = Fraction::ToDecimal.new(fields: %i[w h], targets: %i[w height], delete_sources: true)
# input = [{w: '8 1/2', h: '11'}]
# result = input.map{ |row| xform.process(row) }
# expected = [{w: '8.5', height: '11'}]
# expect(result).to eq(expected)
#
# @example `target_format: :float` and `places: 2`
# # Used in pipeline as:
# # transform Fraction::ToDecimal, fields: :w, target_format: :float, places: 2
# xform = Fraction::ToDecimal.new(fields: :w, target_format: :float, places: 2)
# input = [
# {w: '8-2/3'},
# {w: '2/3 in'}
# ]
# result = input.map{ |row| xform.process(row) }
# expected = [
# {w: 8.67},
# {w: '0.67 in'}
# ]
# expect(result).to eq(expected)
#
# @example `target_format: :float, places: 2, whole_fraction_sep: [' ']`
# # Used in pipeline as:
# # transform Fraction::ToDecimal,
# # fields: :w,
# # target_format: :float,
# # places: 2,
# # whole_fraction_sep: [' ']
# xform = Fraction::ToDecimal.new(
# fields: :w, target_format: :float, places: 2, whole_fraction_sep: [' ']
# )
# input = [
# {w: '8-2/3'},
# {w: '2/3 in'}
# ]
# result = input.map{ |row| xform.process(row) }
# expected = [
# {w: '8-0.67'},
# {w: '0.67 in'}
# ]
# expect(result).to eq(expected)
class ToDecimal
# @param fields [Symbol, Array(Symbol)] Source data fields. If no targets given, converted values are
# written back into the original fields.
# @param targets [nil, Symbol, Array(Symbol)] Target data fields, if different from source data fields.
# If `targets` are specified at all, a target must be specified for each value in `fields`. The target
# for a given field can be the same as the given field, however.
# @param target_format [:string, :float] If fractions are being extracted from longer text strings and replaced,
# this should always be `:string`. Likewise if there may be more than one fraction in a given field value.
# This is the usual expected case. If the source data fields are known to only contain single fraction
# values and you need to use them in calculations in subsequent transforms within the same job, you may
# wish to set this as `:float` for greater accuracy.
# @param places [Integer] Number of decimal places. Applied if `target_format` = `:string`
# @param whole_fraction_sep [Array(String)] List of characters that precede a fraction after a whole
# number, indicating that the whole number and fraction should be extracted together.
# See {Utils::ExtractFractions} for further explanation.
# @param delete_sources [Boolean] If `targets` are given, `fields` are deleted from row. Has no effect
# if no `targets` are given, or if the target for a field equals the field.
def initialize(fields:,
targets: nil,
target_format: :string,
places: 4,
whole_fraction_sep: [' ', '-'],
delete_sources: false)
@fields = [fields].flatten
@targets = targets ? [targets].flatten : nil
@target_format = target_format
@places = places
@delete_sources = delete_sources
@extractor = Kiba::Extend::Utils::ExtractFractions.new(whole_fraction_sep: whole_fraction_sep)
end

# @param row [Hash{ Symbol => String }]
def process(row)
fields.each{ |field| to_decimal(field, row) }
delete_source_fields(row)
row
end

private

attr_reader :fields, :targets, :target_format, :places, :delete_sources, :extractor

def delete_source_fields(row)
return unless delete_sources && targets

fields.each_with_index do |field, ind|
row.delete(field) unless targets[ind] == field
end
end

def floatable?(value)
true unless value.match?(/[^0-9.]/)
end

def format_field_value(value)
return value unless target_format == :float && floatable?(value)

value.to_f
end

def replace_fractions(fractions, value)
val = value.dup
fractions.each do |fraction|
val = fraction.replace_in(val: val, places: places)
end
val
end

def to_decimal(field, row)
targetfield = target(field)
fieldval = row[field]
row[targetfield] = fieldval
return if fieldval.blank?

fractions = extractor.call(fieldval)
return if fractions.empty?

replaced = replace_fractions(fractions, fieldval)
formatted = format_field_value(replaced)

row[targetfield] = formatted
end

def target(srcfield)
return srcfield unless targets

ind = fields.find_index(srcfield)
targets[ind]
end
end
end
end
end
end
78 changes: 78 additions & 0 deletions lib/kiba/extend/utils/extract_fractions.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# frozen_string_literal: true

require 'strscan'

module Kiba
module Extend
module Utils
# Extracts {Data::ConvertibleFractions} from given String and returns only fractions that can be
# converted to decimal, in the order they will need to be replaced in the string
class ExtractFractions
# @param whole_fraction_sep [Array(String)] List of characters that precede a fraction after a whole
# number, indicating that the whole number and fraction should be extracted together. If this is
# set to `[' ', '-']` (the default), then both `1 1/2` and `1-1/2` will be extracted with `1` as
# the whole number and `1/2` as the fraction, and converted to `1.5`. If this is set to `[' ']`,
# then `1 1/2` will be extracted as described preveiously. For `1-1/2`, no whole number value
# will be extracted. `1/2` will be extracted as the fraction, and it will be converted to '0.5'.
def initialize(whole_fraction_sep: [' ', '-'])
@whole_fraction_sep = whole_fraction_sep
@fpattern = /(\d+\/\d+)/
@fraction = Kiba::Extend::Data::ConvertibleFraction
end

# @param value [String]
def call(value)
return [] unless value.match?(fpattern)

result = []
scanner = StringScanner.new(value)
scan(scanner, result)
result.each do |fraction|
unless fraction.convertible?
warn("#{self.class.name}: Unconvertible fraction: #{value[fraction.position]}")
end
end
result.sort.reverse
end

private

attr_reader :fpattern, :whole_fraction_sep, :fraction

def extract_fraction(scanner, result)
startpos = scanner.pos
scanner.scan(fpattern)
result << fraction.new(**{fraction: scanner.captures[0], position: startpos..scanner.pos - 1 })
end

def try_whole_fraction_extract(scanner, result)
startpos = scanner.pos
whole_num = scanner.scan(/\d+/).to_i
sep = scanner.scan(/./)
fmatch = scanner.match?(fpattern)
if whole_fraction_sep.any?(sep) && fmatch
result << fraction.new(**{whole: whole_num, fraction: scanner.scan(fpattern), position: startpos..scanner.pos - 1 })
end
end

def scan(scanner, result)
return if scanner.eos?
return if scanner.rest_size < 3
return unless scanner.exist?(fpattern)

scan_next(scanner, result)
end

def scan_next(scanner, result)
scanner.skip(/\D+/)
if scanner.match?(fpattern)
extract_fraction(scanner, result)
else
try_whole_fraction_extract(scanner, result)
end
scan(scanner, result)
end
end
end
end
end
Loading

0 comments on commit 4a42e3f

Please sign in to comment.