Skip to content

Commit

Permalink
Merge pull request #198 from lyrasis/marc-lang
Browse files Browse the repository at this point in the history
`Marc::LanguageCodeLookup` transform; fix meeting name subfield config
  • Loading branch information
kspurgin authored Jun 6, 2024
2 parents 289a44b + eea1f61 commit dfa6759
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ These changes are merged into the `main` branch, but have not been released. Aft

=== Bugfixes

* Role term and subordinate body subfields for meeting names fixed in default config.
* `IterativeCleanup` now automatically extends its extending module with `Dry::Configurable` prior to defining settings that depend on `Dry::Configurable`. (PR#192)
* `Kiba::Extend::Job.output?` no longer fails if given job returns Nil (PR#194)
* `Reshape::FieldsToFieldGroupWithConstant` constant value is no longer added to rows with no values in the renamed/remapped value fields, when fieldmap length == 1. (PR#195)

=== Added
* `MARC::LanguageCodeLookup` transform
* Ability to pass `find` argument to `Clean::RegexpFindReplaceFieldVals` as a `Regexp` object. Not sure why this was not the default initial behavior, but here we are! (PR#196)

=== Changed
Expand Down
4 changes: 2 additions & 2 deletions lib/kiba/extend/marc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ module Marc
# @return [Array<String>] subfields to be extracted as part of name values
# from {meeting_data_tags}
setting :meeting_name_part_subfields,
default: %w[a q b c d g n u],
default: %w[a q b c e d g n u],
reader: true
# @return [Array<String>] subfields to be extracted as meeting role codes
# from {meeting_data_tags}
Expand All @@ -70,7 +70,7 @@ module Marc
# @return [Array<String>] subfields to be extracted as meeting role terms
# from {meeting_data_tags}
setting :meeting_role_term_subfields,
default: %w[e],
default: %w[j],
reader: true
# @return [Symbol] field in which to write the name value when
# converting MARC data to CSV row when extracting names
Expand Down
5 changes: 3 additions & 2 deletions lib/kiba/extend/transforms/marc/extract_meeting_name_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ module Marc
# @example
# # =001 008000714-7
# # =711 2\$aAssociation of Child Psychology Annual Conference.
# # $esponsor$evenue
# # $eSubcommittee.$jsponsor$jvenue
# rec = get_marc_record(index: 6)
# xform = Marc::ExtractMeetingNameData.new
# results = []
# xform.process(rec){ |row| results << row }
# expect(results.length).to eq(1)
# row = {:sourcefield=>"711",
# :name=>"Association of Child Psychology Annual Conference",
# :name=>"Association of Child Psychology Annual "\
# "Conference. Subcommittee",
# :nametype=>"meeting", :role_code=>"",
# :role_term=>"sponsor|venue", :marcid=>"008000714-7"}
# expect(results.first).to eq(row)
Expand Down
24 changes: 17 additions & 7 deletions lib/kiba/extend/transforms/marc/extract_subfields_from_field.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ module Kiba
module Extend
module Transforms
module Marc
# rubocop:disable Layout/LineLength
# For each occurrence of given field tag, outputs a row with the
# following columns: marcid, fullfield, and one column per specified
# subfield. If there are more than one occurrnces of a subfield
Expand All @@ -24,14 +23,16 @@ module Marc
# xform.process(rec){ |row| results << row }
# expect(results.length).to eq(1)
# first = {
# :full260=>"260 $a Lahore : $b Zia-ul-Qurʾaan Publications, $c 1996. ",
# :full260=>"260 $a Lahore : $b Zia-ul-Qurʾaan Publications, "\
# "$c 1996. ",
# :_260a=>"Lahore :", :_260b=>"Zia-ul-Qurʾaan Publications,",
# :_260e=>nil, :_260f=>nil, :marcid=>"008000103-3"
# }
# expect(results[0]).to eq(first)
# @example
# # =001 008000411-3
# # =260 \\$aSan Jose, Calif. ;$aNew York, NY :$bH.M. Gousha Co.,$c[1986?]
# # =260 \\$aSan Jose, Calif. ;$aNew York, NY :$bH.M. Gousha
# # Co.,$c[1986?]
# rec = get_marc_record(index: 3)
# xform = Marc::ExtractSubfieldsFromField.new(
# tag: '260', subfields: %w[a b e f]
Expand All @@ -40,27 +41,31 @@ module Marc
# xform.process(rec){ |row| results << row }
# expect(results.length).to eq(1)
# first = {
# :full260=>"260 $a San Jose, Calif. ; $a New York, NY : $b H.M. Gousha Co., $c [1986?] ",
# :full260=>"260 $a San Jose, Calif. ; $a New York, NY : $b "\
# "H.M. Gousha Co., $c [1986?] ",
# :_260a=>"San Jose, Calif. ;|New York, NY :",
# :_260b=>"H.M. Gousha Co.,",
# :_260e=>nil, :_260f=>nil, :marcid=>"008000411-3"
# }
# expect(results[0]).to eq(first)
#
# @since 4.0.0
# rubocop:enable Layout/LineLength
class ExtractSubfieldsFromField
include FieldLinkable
# @param tag [String] MARC tag from which to extract subfield values
# @param subfields [Array<String>] subfield codes from which to
# extract values
# @param indicators [Boolean] whether to output indicators as separate
# field values
# @param id_target [Symbol] name of field in which to write id value
# @param delim [String] used when joining multiple values from
# recurring subfield
def initialize(tag:, subfields:,
def initialize(tag:, subfields:, indicators: false,
id_target: Kiba::Extend::Marc.id_target_field,
delim: Kiba::Extend.delim)
@tag = tag
@subfields = subfields
@indicators = indicators
@id_target = id_target
@delim = delim
@idextractor = Kiba::Extend::Utils::MarcIdExtractor.new
Expand All @@ -84,14 +89,19 @@ def process(record)

private

attr_reader :tag, :subfields, :id_target, :delim, :idextractor
attr_reader :tag, :subfields, :indicators, :id_target, :delim,
:idextractor

def prepare_rows(fields, idhash)
fields.map { |fld| prepare_row(fld, idhash) }
end

def prepare_row(field, idhash)
row = {"full#{tag}".to_sym => field.to_s}.merge(idhash)
if indicators
row[:i1] = field.indicator1
row[:i2] = field.indicator2
end
subfields.each do |code|
row["_#{tag}#{code}".to_sym] = sf_val(field, code)
end
Expand Down
55 changes: 55 additions & 0 deletions lib/kiba/extend/transforms/marc/language_code_lookup.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# frozen_string_literal: true

require "net/http"
require "uri"

module Kiba
module Extend
module Transforms
module Marc
# Look up MARC List for Languages codes and provide preferred
# label value
#
# @example Known code
# row = {code: "eng"}
# result = Marc::LanguageCodeLookup.new(source: :code).process(row)
# expect(result).to eq(row.merge({language: "English"}))
#
# @example Unknown code
# row = {code: "foo"}
# result = Marc::LanguageCodeLookup.new(source: :code).process(row)
# expect(result).to eq(row.merge({language: nil}))
class LanguageCodeLookup
# @param source [Symbol] row field containing language code to
# look up
# @param target [Symbol] row field into which language label value
# will be written
def initialize(source:, target: :language)
@source = source
@target = target
@host = URI.parse("https://id.loc.gov").hostname
end

# @param row [Hash{ Symbol => String, nil }]
# @return [Hash{ Symbol => String, nil }]
def process(row)
Net::HTTP.start(host, use_ssl: true) do |http|
row[target] = api_result(http, row[source])
end
row
end

private

attr_reader :source, :target, :host

def api_result(http, code)
return if code.blank?

http.head("/vocabulary/languages/#{code}")["x-preflabel"]
end
end
end
end
end
end
Loading

0 comments on commit dfa6759

Please sign in to comment.