diff --git a/Gemfile.lock b/Gemfile.lock index 413fd8c..b147d35 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ PATH remote: . specs: ontologies_api_client (2.2.0) - activesupport + activesupport (~> 7.0.4) excon faraday faraday-excon (~> 2.0.0) @@ -10,12 +10,14 @@ PATH lz4-ruby multi_json oj + parallel + request_store spawnling (= 2.1.5) GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4) + activesupport (7.0.8.1) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) @@ -24,11 +26,11 @@ GEM public_suffix (>= 2.0.2, < 6.0) bigdecimal (3.1.7) coderay (1.1.3) - concurrent-ruby (1.1.10) + concurrent-ruby (1.2.3) crack (1.0.0) bigdecimal rexml - excon (0.95.0) + excon (0.110.0) faraday (2.0.1) faraday-net_http (~> 2.0) ruby2_keywords (>= 0.0.4) @@ -39,26 +41,31 @@ GEM multipart-post (~> 2) faraday-net_http (2.1.0) hashdiff (1.1.0) - i18n (1.12.0) + i18n (1.14.4) concurrent-ruby (~> 1.0) lz4-ruby (0.3.3) - method_source (1.0.0) - minitest (5.16.3) + method_source (1.1.0) + minitest (5.22.3) multi_json (1.15.0) - multipart-post (2.2.3) - oj (3.13.23) - power_assert (2.0.2) - pry (0.14.1) + multipart-post (2.4.0) + oj (3.16.3) + bigdecimal (>= 3.0) + parallel (1.24.0) + power_assert (2.0.3) + pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) public_suffix (5.0.5) - rake (13.0.6) + rack (3.0.10) + rake (13.2.1) + request_store (1.7.0) + rack (>= 1.4) rexml (3.2.6) ruby2_keywords (0.0.5) spawnling (2.1.5) - test-unit (3.5.7) + test-unit (3.6.2) power_assert - tzinfo (2.0.5) + tzinfo (2.0.6) concurrent-ruby (~> 1.0) webmock (3.23.0) addressable (>= 2.8.0) @@ -66,6 +73,7 @@ GEM hashdiff (>= 0.4.0, < 2.0.0) PLATFORMS + ruby x86_64-darwin-21 x86_64-darwin-23 x86_64-linux diff --git a/config/config.test.rb b/config/config.test.rb index e21b548..f259b60 100644 --- a/config/config.test.rb +++ b/config/config.test.rb @@ -1,12 +1,33 @@ -# config.rb is required for testing -# unit test makes calls to bioportal api so it needs a valid API key which can -# be set via ENV variable UT_APIKEY $API_CLIENT_INVALIDATE_CACHE = false $DEBUG_API_CLIENT = false + LinkedData::Client.config do |config| - config.rest_url = 'https://data.bioontology.org' + config.rest_url = 'https://data.bioontology.org/' config.apikey = '8b5b7825-538d-40e0-9e9e-5ab9274a9aeb' config.links_attr = 'links' config.cache = true config.debug_client = false + config.debug_client_keys = [] + config.federated_portals = { + bioportal: { + api: 'https://data.agroportal.lirmm.fr/', + apikey: '1de0a270-29c5-4dda-b043-7c3580628cd5', + color: '#234979', + }, + ecoportal: { + api: 'https://data.ecoportal.lifewatch.eu/', + apikey: "43a437ba-a437-4bf0-affd-ab520e584719", + color: '#0f4e8a', + }, + # earthportal: { + # api: 'https://earthportal.eu:8443/', + # apikey: "c9147279-954f-41bd-b068-da9b0c441288", + # color: '#1e2251', + # }, + biodivportal: { + api: 'https://data.biodivportal.gfbio.dev/', + apikey: "47a57aa3-7b54-4f34-b695-dbb5f5b7363e", + color: '#1e2251', + } + } end diff --git a/lib/ontologies_api_client.rb b/lib/ontologies_api_client.rb index 739639a..43c4f6d 100644 --- a/lib/ontologies_api_client.rb +++ b/lib/ontologies_api_client.rb @@ -1,6 +1,7 @@ require 'oj' require 'multi_json' require 'spawnling' +require 'request_store' require_relative 'ontologies_api_client/config' require_relative 'ontologies_api_client/http' diff --git a/lib/ontologies_api_client/analytics.rb b/lib/ontologies_api_client/analytics.rb index cbb85ac..b6a78cc 100644 --- a/lib/ontologies_api_client/analytics.rb +++ b/lib/ontologies_api_client/analytics.rb @@ -1,6 +1,9 @@ +require_relative 'request_federation' + module LinkedData::Client class Analytics HTTP = LinkedData::Client::HTTP + include LinkedData::Client::RequestFederation attr_accessor :onts, :date @@ -10,18 +13,32 @@ def self.all(params = {}) def self.last_month data = self.new - data.date = last_month = DateTime.now - 1.month + last_month = DateTime.now.prev_month year_num = last_month.year month_num = last_month.month - analytics = get(:analytics, {year: year_num, month: month_num}).to_h - analytics.delete(:links) - analytics.delete(:context) + params = { year: year_num, month: month_num } + + responses = federated_get(params) do |url| + "#{url}/analytics" + end + + portals = request_portals onts = [] - analytics.keys.each do |ont| - views = analytics[ont][:"#{year_num}"][:"#{month_num}"] - onts << {ont: ont, views: views} + responses.each_with_index do |portal_views, index| + next nil if portal_views&.errors + + portal_views = portal_views.to_h + + url = portals[index].url_prefix.to_s.chomp('/') + portal_views.delete(:links) + portal_views.delete(:context) + portal_views.keys.map do |ont| + views = portal_views[ont][:"#{year_num}"][:"#{month_num}"] + onts << { ont: "#{url}/ontologies/#{ont}", views: views } + end end - data.onts = onts + + data.onts = onts.flatten.compact data end @@ -29,7 +46,7 @@ def self.last_month def self.get(path, params = {}) path = path.to_s - path = "/"+path unless path.start_with?("/") + path = "/" + path unless path.start_with?("/") HTTP.get(path, params) end diff --git a/lib/ontologies_api_client/base.rb b/lib/ontologies_api_client/base.rb index aedd782..f7a65d9 100644 --- a/lib/ontologies_api_client/base.rb +++ b/lib/ontologies_api_client/base.rb @@ -132,11 +132,16 @@ def create_attributes(attributes) attr_exists = self.public_methods(false).include?(attr) unless attr_exists self.class.class_eval do - define_method attr.to_sym do - instance_variable_get("@#{attr}") + unless method_defined?(attr.to_sym) + define_method attr.to_sym do + instance_variable_get("@#{attr}") + end end - define_method "#{attr}=" do |val| - instance_variable_set("@#{attr}", val) + + unless method_defined?("#{attr}=".to_sym) + define_method "#{attr}=" do |val| + instance_variable_set("@#{attr}", val) + end end end end diff --git a/lib/ontologies_api_client/collection.rb b/lib/ontologies_api_client/collection.rb index 77572bf..a4d34c2 100644 --- a/lib/ontologies_api_client/collection.rb +++ b/lib/ontologies_api_client/collection.rb @@ -1,11 +1,15 @@ require_relative 'config' require_relative 'http' +require_relative 'request_federation' +require 'parallel' module LinkedData module Client module Collection + def self.included(base) + base.include LinkedData::Client::RequestFederation base.extend(ClassMethods) end @@ -24,8 +28,8 @@ def method_missing(meth, *args, &block) ## # Get all top-level links for the API - def top_level_links - @top_level_links||= HTTP.get(LinkedData::Client.settings.rest_url) + def top_level_links(link = LinkedData::Client.settings.rest_url) + HTTP.get(link) end ## @@ -36,11 +40,14 @@ def uri_from_context(object, media_type) end end + ## # Get the first collection of resources for a given type def entry_point(media_type, params = {}) - params = {include: @include_attrs}.merge(params) - HTTP.get(uri_from_context(top_level_links, media_type), params) + params = { include: @include_attrs, display_links: false, display_context: false}.merge(params) + federated_get(params) do |url| + uri_from_context(top_level_links(url), media_type) rescue nil + end end ## diff --git a/lib/ontologies_api_client/config.rb b/lib/ontologies_api_client/config.rb index 25805a8..e2d063e 100644 --- a/lib/ontologies_api_client/config.rb +++ b/lib/ontologies_api_client/config.rb @@ -37,21 +37,38 @@ def config(&block) def config_connection(options = {}) return if @settings_run_connection - store = options[:cache_store] - @settings.conn = Faraday.new(@settings.rest_url) do |faraday| + store = options[:cache_store] || ActiveSupport::Cache::MemoryStore.new + @settings.conn = faraday_connection(@settings.rest_url, @settings.apikey, store, current_portal: true) + @settings.federated_conn = @settings.federated_portals.map do |portal_name, portal_info| + [portal_name, faraday_connection(portal_info[:api], portal_info[:apikey], store)] + end.to_h + + @settings_run_connection = true + end + + def connection_configured? + @settings_run_connection + end + + private + def faraday_connection(url, apikey, store, current_portal: false) + Faraday.new(url.to_s.chomp('/')) do |faraday| + if @settings.enable_long_request_log require_relative 'middleware/faraday-long-requests' faraday.use :long_requests end - require_relative 'middleware/faraday-user-apikey' - faraday.use :user_apikey + if current_portal + require_relative 'middleware/faraday-user-apikey' + faraday.use :user_apikey - require_relative 'middleware/faraday-slices' - faraday.use :ncbo_slices + require_relative 'middleware/faraday-slices' + faraday.use :ncbo_slices - require_relative 'middleware/faraday-last-updated' - faraday.use :last_updated + require_relative 'middleware/faraday-last-updated' + faraday.use :last_updated + end if @settings.cache begin @@ -69,15 +86,10 @@ def config_connection(options = {}) faraday.adapter :excon faraday.headers = { "Accept" => "application/json", - "Authorization" => "apikey token=#{@settings.apikey}", + "Authorization" => "apikey token=#{apikey}", "User-Agent" => "NCBO API Ruby Client v0.1.0" } end - @settings_run_connection = true - end - - def connection_configured? - @settings_run_connection end end end \ No newline at end of file diff --git a/lib/ontologies_api_client/http.rb b/lib/ontologies_api_client/http.rb index cf42bb0..775237e 100644 --- a/lib/ontologies_api_client/http.rb +++ b/lib/ontologies_api_client/http.rb @@ -3,6 +3,7 @@ require 'digest' require 'ostruct' require 'benchmark' +require 'active_support/cache' ## # This monkeypatch makes OpenStruct act like Struct objects class OpenStruct @@ -48,22 +49,27 @@ def self.conn rails = Kernel.const_get("Rails") store = rails.cache if rails.cache end - LinkedData::Client.config_connection(cache_store: store) + LinkedData::Client.config_connection(cache_store: store || ActiveSupport::Cache::MemoryStore.new) end LinkedData::Client.settings.conn end + def self.federated_conn + LinkedData::Client.settings.federated_conn + end + def self.get(path, params = {}, options = {}) headers = options[:headers] || {} raw = options[:raw] || false # return the unparsed body of the request params = params.delete_if { |k, v| v == nil || v.to_s.empty? } params[:ncbo_cache_buster] = Time.now.to_f if raw # raw requests don't get cached to ensure body is available invalidate_cache = params.delete(:invalidate_cache) || $API_CLIENT_INVALIDATE_CACHE || false + connection = options[:connection] || conn begin begin response = nil time = Benchmark.realtime do - response = conn.get do |req| + response = connection.get do |req| req.url path req.params = params.dup req.options[:timeout] = 60 @@ -71,7 +77,7 @@ def self.get(path, params = {}, options = {}) req.headers[:invalidate_cache] = invalidate_cache end end - puts "Getting: #{path} with #{params} (#{time}s)" if $DEBUG_API_CLIENT + puts "Getting: #{path} with #{params} (t: #{time}s - cache: #{response.headers["X-Rack-Cache"]})" if $DEBUG_API_CLIENT rescue Exception => e params = Faraday::Utils.build_query(params) path << "?" unless params.empty? || path.include?("?") diff --git a/lib/ontologies_api_client/middleware/faraday-object-cache.rb b/lib/ontologies_api_client/middleware/faraday-object-cache.rb index 080c416..f8ea13f 100644 --- a/lib/ontologies_api_client/middleware/faraday-object-cache.rb +++ b/lib/ontologies_api_client/middleware/faraday-object-cache.rb @@ -1,4 +1,5 @@ require 'digest/sha1' +require 'active_support' require 'active_support/cache' require 'lz4-ruby' require_relative '../http' @@ -70,7 +71,7 @@ def retrieve_cached_response(request_key) env = { status: 304 } cached_response = ObjectCacheResponse.new(env) cached_response.parsed_body = ld_obj - cached_response.env.response_headers = { "x-rack-cache" => 'hit' } + cached_response.env.response_headers = { "X-Rack-Cache" => 'hit' } cached_response end @@ -88,7 +89,7 @@ def process_response(response_env, request_key) response = ObjectCacheResponse.new(response_env) response.parsed_body = ld_obj - response.env.response_headers["x-rack-cache"] = cache_state + response.env.response_headers["X-Rack-Cache"] = cache_state response end diff --git a/lib/ontologies_api_client/models/class.rb b/lib/ontologies_api_client/models/class.rb index e249887..41895d5 100644 --- a/lib/ontologies_api_client/models/class.rb +++ b/lib/ontologies_api_client/models/class.rb @@ -1,11 +1,14 @@ require "cgi" require_relative "../base" +require_relative "../request_federation" module LinkedData module Client module Models + class Class < LinkedData::Client::Base HTTP = LinkedData::Client::HTTP + include LinkedData::Client::RequestFederation @media_type = %w[http://www.w3.org/2002/07/owl#Class http://www.w3.org/2004/02/skos/core#Concept] @include_attrs = "prefLabel,definition,synonym,obsolete,hasChildren,inScheme,memberOf" @include_attrs_full = "prefLabel,definition,synonym,obsolete,properties,hasChildren,childre,inScheme,memberOf" @@ -61,10 +64,27 @@ def self.find(id, ontology, params = {}) def self.search(*args) query = args.shift + params = args.shift || {} + params[:q] = query + raise ArgumentError, "You must provide a search query: Class.search(query: 'melanoma')" if query.nil? || !query.is_a?(String) - HTTP.post("/search", params) + + + search_result = federated_get(params) do |url| + "#{url}/search" + end + merged_collections = {collection: [], errors: []} + search_result.each do |result| + if result.collection + merged_collections[:collection].concat(result.collection) + elsif result.errors + merged_collections[:errors] << result.errors + end + end + OpenStruct.new(merged_collections) + end def expanded? diff --git a/lib/ontologies_api_client/request_federation.rb b/lib/ontologies_api_client/request_federation.rb new file mode 100644 index 0000000..b57d3db --- /dev/null +++ b/lib/ontologies_api_client/request_federation.rb @@ -0,0 +1,49 @@ +require 'active_support/core_ext/hash' + +module LinkedData + module Client + module RequestFederation + + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + def federated_get(params = {}, &link) + portals = request_portals(params) + main_thread_locals = Thread.current.keys.map { |key| [key, Thread.current[key]] }.to_h + + connections = Parallel.map(portals, in_threads: portals.size) do |conn| + main_thread_locals.each { |key, value| Thread.current[key] = value } + begin + HTTP.get(link.call(conn.url_prefix.to_s.chomp('/')), params, connection: conn) + rescue Exception => e + [OpenStruct.new(errors: "Problem retrieving #{link.call(conn.url_prefix.to_s.chomp('/')) || conn.url_prefix}")] + end + end + + connections.flatten + end + + + + def request_portals(params = {}) + federate = params.delete(:federate) || ::RequestStore.store[:federated_portals] + + portals = [LinkedData::Client::HTTP.conn] + + if federate.is_a?(Array) + portals += LinkedData::Client::HTTP.federated_conn + .select { |portal_name, _| federate.include?(portal_name) || federate.include?(portal_name.to_s) } + .values + elsif !federate.blank? # all + portals += LinkedData::Client::HTTP.federated_conn.values + end + + portals + end + end + + end + end +end diff --git a/ontologies_api_client.gemspec b/ontologies_api_client.gemspec index fad13e4..b2ae8c2 100644 --- a/ontologies_api_client.gemspec +++ b/ontologies_api_client.gemspec @@ -12,7 +12,7 @@ Gem::Specification.new do |gem| gem.require_paths = ["lib"] gem.version = "2.2.0" - gem.add_dependency('activesupport') + gem.add_dependency('activesupport', '~> 7.0.4') gem.add_dependency('excon') gem.add_dependency('faraday') gem.add_dependency('faraday-excon', '~> 2.0.0') @@ -20,5 +20,7 @@ Gem::Specification.new do |gem| gem.add_dependency('lz4-ruby') gem.add_dependency('multi_json') gem.add_dependency('oj') + gem.add_dependency('parallel') + gem.add_dependency('request_store') gem.add_dependency('spawnling', '2.1.5') end diff --git a/test/middleware/test_cache.rb b/test/middleware/test_cache.rb index ddea7a9..943b55c 100644 --- a/test/middleware/test_cache.rb +++ b/test/middleware/test_cache.rb @@ -20,6 +20,10 @@ def setup end end + def teardown + WebMock.disable! + end + def test_cache_hit_for_get_request body1, body2 = nil # First request should not hit the cache @@ -128,14 +132,14 @@ def test_cache_last_modified private def cached?(response) - response.env.response_headers['x-rack-cache'].eql?('hit') + response.env.response_headers['X-Rack-Cache'].eql?('hit') end def uncached?(response) - response.env.response_headers['x-rack-cache'].eql?('miss') + response.env.response_headers['X-Rack-Cache'].eql?('miss') end def refreshed?(response) - response.env.response_headers['x-rack-cache'].eql?('fresh') + response.env.response_headers['X-Rack-Cache'].eql?('fresh') end end diff --git a/test/models/test_federation.rb b/test/models/test_federation.rb new file mode 100644 index 0000000..5a9a147 --- /dev/null +++ b/test/models/test_federation.rb @@ -0,0 +1,139 @@ +require_relative '../test_case' +require 'pry' +require 'benchmark' +require 'webmock' +require 'request_store' + +class FederationTest < LinkedData::Client::TestCase + + def test_federated_ontologies_all + ontologies = [] + time1 = Benchmark.realtime do + ontologies = LinkedData::Client::Models::Ontology.all(display_links: false, display_context: false) + end + + ontologies_federate_all = [] + time2 = Benchmark.realtime do + ontologies_federate_all = LinkedData::Client::Models::Ontology.all(federate: true, display_links: false, display_context: false) + end + + puts "" + puts "AgroPortal ontologies: #{ontologies.length} in #{time1}s" + puts "Federated ontologies: #{ontologies_federate_all.length} in #{time2}s" + + refute_equal ontologies.length, ontologies_federate_all.length + + ontologies_federate_all.group_by{|x| x.id.split('/')[0..-2].join('/')}.each do |portal, onts| + puts "#{portal} ontologies: #{onts.length}" + end + + ontologies_federate_all_cache = [] + time2 = Benchmark.realtime do + ontologies_federate_all_cache = LinkedData::Client::Models::Ontology.all(federate: true, display_links: false, display_context: false) + end + + + puts "Federated ontologies with cache: #{ontologies_federate_all_cache.length} in #{time2}s" + + assert_equal ontologies_federate_all_cache.size, ontologies_federate_all.size + + ontologies_federate_two = [] + time2 = Benchmark.realtime do + ontologies_federate_two = LinkedData::Client::Models::Ontology.all(federate: [:ecoportal, :biodivportal], display_links: false, display_context: false) + end + + puts "Federated ontologies with two portal only with cache: #{ontologies_federate_two.length} in #{time2}s" + + refute_equal ontologies_federate_two.size, ontologies_federate_all.size + + federated_portals = ontologies_federate_two.map{|x| x.id.split('/')[0..-2].join('/')}.uniq + assert_equal 3, federated_portals.size + assert %w[bioontology ecoportal biodivportal].all? { |p| federated_portals.any?{|id| id[p]} } + end + + def test_federated_submissions_all + onts = [] + time1 = Benchmark.realtime do + onts = LinkedData::Client::Models::OntologySubmission.all + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::OntologySubmission.all(federate: true) + end + + puts "" + puts "AgroPortal submissions: #{onts.length} in #{time1}s" + puts "Federated submissions: #{onts_federate.length} in #{time2}s" + + refute_equal onts.length, onts_federate.length + + onts_federate.group_by{|x| x.id.split('/')[0..-4].join('/')}.each do |portal, onts| + puts "#{portal} submissions: #{onts.length}" + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::OntologySubmission.all(federate: true) + end + puts "Federated submissions with cache: #{onts_federate.length} in #{time2}s" + + end + + def test_federation_middleware + ontologies_federate_one = LinkedData::Client::Models::Ontology.all(federate: [:ecoportal, :biodivportal], display_links: false, display_context: false) + + RequestStore.store[:federated_portals] = [:ecoportal, :biodivportal] #saved globally + + ontologies_federate_two = LinkedData::Client::Models::Ontology.all(display_links: false, display_context: false) + assert_equal ontologies_federate_one.size, ontologies_federate_two.size + end + + + def test_federation_error + WebMock.enable! + LinkedData::Client::Models::Ontology.all(invalidate_cache: true) + WebMock.stub_request(:get, "#{LinkedData::Client.settings.rest_url.chomp('/')}/ontologies?include=all&display_links=false&display_context=false") + .to_return(body: "Internal server error", status: 500) + + ontologies_federate_one = LinkedData::Client::Models::Ontology.all(federate: [:ecoportal, :biodivportal], display_links: false, display_context: false, invalidate_cache: true) + + assert_equal "Problem retrieving #{LinkedData::Client.settings.rest_url}/ontologies", ontologies_federate_one.first.errors + + WebMock.disable! + end + + def test_federated_analytics + RequestStore.store[:federated_portals] = [:ecoportal,:biodivportal] + analytics = LinkedData::Client::Analytics.last_month + refute_empty analytics.onts + end + + + def test_federation_ssl_error + WebMock.enable! + WebMock.stub_request(:get, "#{LinkedData::Client.settings.rest_url.chomp('/')}") + .to_raise(Faraday::SSLError) + ontologies_federate_one = LinkedData::Client::Models::Ontology.all(display_links: false, display_context: false, invalidate_cache: true) + + refute_nil ontologies_federate_one.first.errors + WebMock.disable! + end + + def test_federated_search + query = 'test' + + time1 = Benchmark.realtime do + @search_results = LinkedData::Client::Models::Class.search(query).collection + end + + time2 = Benchmark.realtime do + @federated_search_results = LinkedData::Client::Models::Class.search(query, {federate: 'true'}).collection + end + + puts "Search results: #{@search_results.length} in #{time1}s" + puts "Federated search results: #{@federated_search_results.length} in #{time2}s" + + refute_equal @search_results.length, @federated_search_results.length + end +end diff --git a/test/test_case.rb b/test/test_case.rb index cd9fe2e..0f2f907 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -1,7 +1,9 @@ require 'test-unit' require_relative '../lib/ontologies_api_client' require_relative '../config/config' +require 'webmock' +WebMock.allow_net_connect! module LinkedData module Client class TestCase < Test::Unit::TestCase