Skip to content

Commit

Permalink
Merge pull request #35 from lyrasis/docs-and-tests
Browse files Browse the repository at this point in the history
docs and tests
  • Loading branch information
kspurgin authored Aug 31, 2021
2 parents 4e27579 + 6460a0e commit 1e90790
Show file tree
Hide file tree
Showing 41 changed files with 750 additions and 568 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
.rspec_status
.byebug_history

**/.~lock*
**/.~lock*coverage
13 changes: 13 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,18 @@ source 'https://rubygems.org'

git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }

group :development, :test do
gem 'bundler', '>= 1.17'
gem 'byebug', '~>11.0'
gem 'pry', '~> 0.14'
gem 'rake', '~> 13.0'
gem 'rspec', '~> 3.0'
gem 'rubocop', '~> 1.18.4'
gem 'rubocop-rspec', '~> 2.4.0'
end

group :test do
gem 'simplecov', require: false
end
# Specify your gem's dependencies in kiba-extend.gemspec
gemspec
10 changes: 9 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
kiba-extend (2.2.0)
kiba-extend (2.2.1)
activesupport (~> 6.1.4)
csv (~> 3.0)
dry-configurable (~> 0.11)
Expand All @@ -25,6 +25,7 @@ GEM
concurrent-ruby (1.1.9)
csv (3.2.0)
diff-lcs (1.3)
docile (1.4.0)
dry-configurable (0.12.1)
concurrent-ruby (~> 1.0)
dry-core (~> 0.5, >= 0.5.0)
Expand Down Expand Up @@ -78,6 +79,12 @@ GEM
rubocop (~> 1.0)
rubocop-ast (>= 1.1.0)
ruby-progressbar (1.11.0)
simplecov (0.21.2)
docile (~> 1.1)
simplecov-html (~> 0.11)
simplecov_json_formatter (~> 0.1)
simplecov-html (0.12.3)
simplecov_json_formatter (0.1.3)
tzinfo (2.0.4)
concurrent-ruby (~> 1.0)
unicode-display_width (2.0.0)
Expand All @@ -96,6 +103,7 @@ DEPENDENCIES
rspec (~> 3.0)
rubocop (~> 1.18.4)
rubocop-rspec (~> 2.4.0)
simplecov

BUNDLED WITH
2.1.4
14 changes: 7 additions & 7 deletions kiba-extend.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ Gem::Specification.new do |spec|
spec.add_dependency 'kiba-common', '~> 1.5.0'
spec.add_dependency 'xxhash', '~> 0.4'

spec.add_development_dependency 'bundler', '>= 1.17'
spec.add_development_dependency 'byebug', '~>11.0'
spec.add_development_dependency 'pry', '~> 0.14'
spec.add_development_dependency 'rake', '~> 13.0'
spec.add_development_dependency 'rspec', '~> 3.0'
spec.add_development_dependency 'rubocop', '~> 1.18.4'
spec.add_development_dependency 'rubocop-rspec', '~> 2.4.0'
# spec.add_development_dependency 'bundler', '>= 1.17'
# spec.add_development_dependency 'byebug', '~>11.0'
# spec.add_development_dependency 'pry', '~> 0.14'
# spec.add_development_dependency 'rake', '~> 13.0'
# spec.add_development_dependency 'rspec', '~> 3.0'
# spec.add_development_dependency 'rubocop', '~> 1.18.4'
# spec.add_development_dependency 'rubocop-rspec', '~> 2.4.0'
end
2 changes: 1 addition & 1 deletion lib/kiba/extend.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ module Extend
# Prefix for warnings from the ETL
setting :warning_label, 'KIBA WARNING', reader: true

setting :registry, Kiba::Extend::FileRegistry.new, reader: true
setting :registry, Kiba::Extend::Registry::FileRegistry.new, reader: true

setting :job, reader: true do
# Whether to output results to STDOUT for debugging
Expand Down
11 changes: 11 additions & 0 deletions lib/kiba/extend/destinations.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# frozen_string_literal: true

module Kiba
module Extend
# Classes defining data outputs
#
# This structure follows the pattern set out in {https://github.com/thbar/kiba-common kiba-common}
module Destinations
end
end
end
10 changes: 10 additions & 0 deletions lib/kiba/extend/destinations/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,25 @@
module Kiba
module Extend
module Destinations
# An extension of Kiba::Common's CSV destination, adding the `initial_headers` option
class CSV
attr_reader :filename, :csv_options, :csv, :headers

# @param filename [String] path for writing CSV
# @param csv_options [Hash] options passable to CSV objects. Refer to
# https://rubyapi.org/2.7/o/csv#method-c-new for details
# @param headers Don't use this
# @param initial_headers [Array<Symbol>] names of fields in the order you want them output in the
# CSV. Any you do not explicitly include here will be appended in whatever order they got
# created/processed in, to the right of the ones named here.
def initialize(filename:, csv_options: nil, headers: nil, initial_headers: [])
@filename = filename
@csv_options = csv_options || {}
@headers = headers
@initial_headers = initial_headers
end

# @private
def write(row)
@csv ||= ::CSV.open(filename, 'wb', csv_options)
@headers ||= row.keys
Expand All @@ -23,6 +32,7 @@ def write(row)
csv << row.fetch_values(*@headers)
end

# @private
def close
csv&.close
end
Expand Down
62 changes: 0 additions & 62 deletions lib/kiba/extend/fieldset.rb

This file was deleted.

47 changes: 47 additions & 0 deletions lib/kiba/extend/jobs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@

module Kiba
module Extend
# Reusable, composable patterns for jobs
#
# Heretofore, I have been repeating tons of code/logic for setting up a job in migration code:
#
# - Defining sources/destinations, @srcrows, @outrows
# - Changing CSV rows to hashes (initial transforms)
# - Changing hashes back to CSV rows
# - Calling postprocessing
#
# Most of this never changes, and when it does there is way too much tedious work in a given migration
# to make it consistent across all jobs.
#
# This is an attempt to dry up calling jobs and make it possible to test them via RSpec
#
# Running `Kiba.parse` to define a job generates a
# {https://github.com/thbar/kiba/blob/master/lib/kiba/control.rb Kiba::Control}
# object, which is a wrapper bundling together: pre_processes, config, sources, transforms, destinations, and
# post_processes.
#
# As described {https://github.com/thbar/kiba/wiki/Implementing-pre-and-post-processors here}, pre_ and post_
# processors get called once per ETL run---either before or after the ETL starts working through the source
# rows
#
# This Kiba::Control object created by Kiba.parse is generated with a particular Kiba::Context, and
# once created, you cannot get access to or manipulate variables or configuration that the entire
# job needs to know about.
#
# What Kiba::Extend::Jobs adds is the ability to set up reusable initial_transformers and final_transformers.
# Basically, job templates where just the meat of the transformations change.
#
# `files` is the configuration of destination, source, and lookup files the job will use. It is a Hash, with
# the following format:
#
# { source: [registry_key, registry_key], destination: [registry_key], lookup: [registry_key] }
#
# { source: [registry_key, registry_key], destination: [registry_key]}
#
# `source` and `destination` must each have at least one registry key. `lookup` may be omitted, or it may
# be included with one or more registry keys
#
# `transformer` is a sequence of data transformations that could theoretically be called with interchangable
# input/output settings (i.e. `materials`).
#
# In project code, instead of defining an entire job in a `Kiba.parse` block, you will define a
# `Kiba.job_segment` block containing just the transforms unique to that job.
#
# @since 2.2.0
module Jobs
end
end
Expand Down
41 changes: 0 additions & 41 deletions lib/kiba/extend/jobs/base_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,6 @@

module Kiba
module Extend
# Reusable, composable patterns for jobs
#
# Heretofore, I have been repeating tons of code/logic for setting up a job in migration code:
#
# - Defining sources/destinations, @srcrows, @outrows
# - Changing CSV rows to hashes (initial transforms)
# - Changing hashes back to CSV rows
# - Calling postprocessing
#
# Most of this never changes, and when it does there is way too much tedious work in a given migration
# to make it consistent across all jobs.
#
# This is an attempt to dry up calling jobs and make it possible to test them automatically with stubbed-in
# enumerable sources/destinations
#
# Running `Kiba.parse` to define a job generates a {https://github.com/thbar/kiba/blob/master/lib/kiba/control.rb Kiba::Control}
# object, which is a wrapper bundling together: pre_processes, config, sources, transforms, destinations, and
# post_processes.
#
# As described {https://github.com/thbar/kiba/wiki/Implementing-pre-and-post-processors here}, pre_ and post_
# processors get called once per ETL run---either before or after the ETL starts working through the source
# rows
#
# What Kiba::Extend::Jobs add is the ability to set up reusable initial_transformers and final_transformers.
# Basically, job templates where just the meat of the transformations change.
#
# `files` is the configuration of destination, source, and lookup files the job will use. It is a Hash, with
# the following format:
#
# { source: [registry_key, registry_key], destination: [registry_key], lookup: [registry_key] }
#
# { source: [registry_key, registry_key], destination: [registry_key]}
#
# `source` and `destination` must each have at least one registry key. `lookup` may be omitted, or it may
# be included with one or more registry keys
#
# `transformer` is a sequence of data transformations that could theoretically be called with interchangable
# input/output settings (i.e. `materials`). In practice, a `recipe` is usually closely tied to particular tables, because
# fields are manipulated by name. However, this should support easier automated testing of `recipes`.
#
# @since 2.2.0
module Jobs
# Abstract definition of Job and job interface
#
Expand Down
18 changes: 18 additions & 0 deletions lib/kiba/extend/registry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

module Kiba
module Extend
# Support for defining project-specific file registry
#
# This DRYs up the process of setting up job configs (i.e. the source, lookup, and destination files
# for that job.
#
# This also allows for automated calling of dependencies instead of having to redundantly
# hard code them for every job. If the file(s) needed as sources or lookups do not exist,
# their creator jobs will be run to create them.
#
# @since 2.2.0
module Registry
end
end
end
Loading

0 comments on commit 1e90790

Please sign in to comment.