diff --git a/Gemfile.lock b/Gemfile.lock index 413fd8c..8a4b0e7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ PATH remote: . specs: ontologies_api_client (2.2.0) - activesupport + activesupport (~> 7.0.4) excon faraday faraday-excon (~> 2.0.0) @@ -10,12 +10,13 @@ PATH lz4-ruby multi_json oj + parallel spawnling (= 2.1.5) GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4) + activesupport (7.0.8.1) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) @@ -24,11 +25,12 @@ GEM public_suffix (>= 2.0.2, < 6.0) bigdecimal (3.1.7) coderay (1.1.3) - concurrent-ruby (1.1.10) + concurrent-ruby (1.2.3) + connection_pool (2.4.1) crack (1.0.0) bigdecimal rexml - excon (0.95.0) + excon (0.110.0) faraday (2.0.1) faraday-net_http (~> 2.0) ruby2_keywords (>= 0.0.4) @@ -39,26 +41,34 @@ GEM multipart-post (~> 2) faraday-net_http (2.1.0) hashdiff (1.1.0) - i18n (1.12.0) + i18n (1.14.4) concurrent-ruby (~> 1.0) lz4-ruby (0.3.3) - method_source (1.0.0) - minitest (5.16.3) + method_source (1.1.0) + minitest (5.22.3) multi_json (1.15.0) - multipart-post (2.2.3) - oj (3.13.23) - power_assert (2.0.2) - pry (0.14.1) + multipart-post (2.4.0) + oj (3.16.3) + bigdecimal (>= 3.0) + parallel (1.24.0) + power_assert (2.0.3) + pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) public_suffix (5.0.5) - rake (13.0.6) + rake (13.2.1) + redis (5.2.0) + redis-client (>= 0.22.0) + redis-client (0.22.1) + connection_pool + redis-namespace (1.11.0) + redis (>= 4) rexml (3.2.6) ruby2_keywords (0.0.5) spawnling (2.1.5) - test-unit (3.5.7) + test-unit (3.6.2) power_assert - tzinfo (2.0.5) + tzinfo (2.0.6) concurrent-ruby (~> 1.0) webmock (3.23.0) addressable (>= 2.8.0) @@ -74,6 +84,8 @@ DEPENDENCIES ontologies_api_client! pry rake + redis (~> 5.2) + redis-namespace (~> 1.11) test-unit webmock diff --git a/config/config.test.rb b/config/config.test.rb index a5d6166..2771e77 100644 --- a/config/config.test.rb +++ b/config/config.test.rb @@ -1,14 +1,34 @@ -# config.rb is required for testing -# unit test makes calls to bioportal api so it needs a valid API key which can -# be set via ENV variable UT_APIKEY -abort('UT_APIKEY env variable is not set. Canceling tests') unless ENV.include?('UT_APIKEY') -abort('UT_APIKEY env variable is set to an empty value. Canceling tests') unless ENV['UT_APIKEY'].size > 5 +ENV['UT_APIKEY'] = '1de0a270-29c5-4dda-b043-7c3580628cd5' $API_CLIENT_INVALIDATE_CACHE = false $DEBUG_API_CLIENT = false + LinkedData::Client.config do |config| - config.rest_url = 'https://data.bioontology.org' - config.apikey = ENV['UT_APIKEY'] -# config.apikey = 'xxxxx-xxxxx-xxxxxxxxxx' + config.rest_url = 'https://data.agroportal.lirmm.fr/' + config.apikey = '1de0a270-29c5-4dda-b043-7c3580628cd5' config.links_attr = 'links' - config.cache = false + config.cache = true + config.debug_client = true + config.debug_client_keys = [] + config.federated_portals = { + bioportal: { + api: 'https://data.bioontology.org/', + apikey: '8b5b7825-538d-40e0-9e9e-5ab9274a9aeb', + color: '#234979', + }, + ecoportal: { + api: 'https://data.ecoportal.lifewatch.eu/', + apikey: "43a437ba-a437-4bf0-affd-ab520e584719", + color: '#0f4e8a', + }, + earthportal: { + api: 'https://earthportal.eu:8443/', + apikey: "c9147279-954f-41bd-b068-da9b0c441288", + color: '#1e2251', + }, + biodivportal: { + api: 'https://data.biodivportal.gfbio.org/', + apikey: "47a57aa3-7b54-4f34-b695-dbb5f5b7363e", + color: '#1e2251', + } + } end diff --git a/lib/ontologies_api_client/base.rb b/lib/ontologies_api_client/base.rb index ff4aba6..a4404df 100644 --- a/lib/ontologies_api_client/base.rb +++ b/lib/ontologies_api_client/base.rb @@ -132,11 +132,16 @@ def create_attributes(attributes) attr_exists = self.public_methods(false).include?(attr) unless attr_exists self.class.class_eval do - define_method attr.to_sym do - instance_variable_get("@#{attr}") + unless method_defined?(attr.to_sym) + define_method attr.to_sym do + instance_variable_get("@#{attr}") + end end - define_method "#{attr}=" do |val| - instance_variable_set("@#{attr}", val) + + unless method_defined?("#{attr}=".to_sym) + define_method "#{attr}=" do |val| + instance_variable_set("@#{attr}", val) + end end end end diff --git a/lib/ontologies_api_client/collection.rb b/lib/ontologies_api_client/collection.rb index 77572bf..0e38d0a 100644 --- a/lib/ontologies_api_client/collection.rb +++ b/lib/ontologies_api_client/collection.rb @@ -24,8 +24,8 @@ def method_missing(meth, *args, &block) ## # Get all top-level links for the API - def top_level_links - @top_level_links||= HTTP.get(LinkedData::Client.settings.rest_url) + def top_level_links(link = LinkedData::Client.settings.rest_url) + HTTP.get(link) end ## @@ -36,11 +36,25 @@ def uri_from_context(object, media_type) end end + require 'parallel' + ## # Get the first collection of resources for a given type def entry_point(media_type, params = {}) - params = {include: @include_attrs}.merge(params) - HTTP.get(uri_from_context(top_level_links, media_type), params) + params = { include: @include_attrs }.merge(params) + federate = params.delete(:federate) + + collections = [HTTP.get(uri_from_context(top_level_links, media_type), params)] + + return collections.first unless federate + + threads_count = LinkedData::Client.settings.federated_conn.size + + connections = Parallel.map(LinkedData::Client.settings.federated_conn, in_threads: threads_count) do |conn| + HTTP.get(uri_from_context(top_level_links(conn.url_prefix.to_s), media_type), params, connection: conn) + end + + connections.flatten end ## diff --git a/lib/ontologies_api_client/config.rb b/lib/ontologies_api_client/config.rb index 25805a8..21ca101 100644 --- a/lib/ontologies_api_client/config.rb +++ b/lib/ontologies_api_client/config.rb @@ -37,8 +37,21 @@ def config(&block) def config_connection(options = {}) return if @settings_run_connection - store = options[:cache_store] - @settings.conn = Faraday.new(@settings.rest_url) do |faraday| + store = options[:cache_store] || ActiveSupport::Cache::MemoryStore.new + @settings.conn = faraday_connection(@settings.rest_url, @settings.apikey, store) + @settings.federated_conn = @settings.federated_portals.map do |portal_name, portal_info| + faraday_connection(portal_info[:api], portal_info[:apikey], store) + end + @settings_run_connection = true + end + + def connection_configured? + @settings_run_connection + end + + private + def faraday_connection(url, apikey, store) + Faraday.new(url) do |faraday| if @settings.enable_long_request_log require_relative 'middleware/faraday-long-requests' faraday.use :long_requests @@ -69,15 +82,10 @@ def config_connection(options = {}) faraday.adapter :excon faraday.headers = { "Accept" => "application/json", - "Authorization" => "apikey token=#{@settings.apikey}", + "Authorization" => "apikey token=#{apikey}", "User-Agent" => "NCBO API Ruby Client v0.1.0" } end - @settings_run_connection = true - end - - def connection_configured? - @settings_run_connection end end end \ No newline at end of file diff --git a/lib/ontologies_api_client/http.rb b/lib/ontologies_api_client/http.rb index d3df241..fb8bf20 100644 --- a/lib/ontologies_api_client/http.rb +++ b/lib/ontologies_api_client/http.rb @@ -3,6 +3,7 @@ require 'digest' require 'ostruct' require 'benchmark' +require 'active_support/cache' ## # This monkeypatch makes OpenStruct act like Struct objects class OpenStruct @@ -49,21 +50,27 @@ def self.conn store = rails.cache if rails.cache end LinkedData::Client.config_connection(cache_store: store) + LinkedData::Client.config_connection(cache_store: store || ActiveSupport::Cache::MemoryStore.new) end LinkedData::Client.settings.conn end + def self.federated_conn + LinkedData::Client.settings.federated_conn + end + def self.get(path, params = {}, options = {}) headers = options[:headers] || {} raw = options[:raw] || false # return the unparsed body of the request params = params.delete_if { |k, v| v == nil || v.to_s.empty? } params[:ncbo_cache_buster] = Time.now.to_f if raw # raw requests don't get cached to ensure body is available invalidate_cache = params.delete(:invalidate_cache) || $API_CLIENT_INVALIDATE_CACHE || false + connection = options[:connection] || conn begin begin response = nil time = Benchmark.realtime do - response = conn.get do |req| + response = connection.get do |req| req.url path req.params = params.dup req.options[:timeout] = 60 @@ -71,7 +78,7 @@ def self.get(path, params = {}, options = {}) req.headers[:invalidate_cache] = invalidate_cache end end - puts "Getting: #{path} with #{params} (#{time}s)" if $DEBUG_API_CLIENT + puts "Getting: #{path} with #{params} (t: #{time}s - cache: #{response.headers["X-Rack-Cache"]})" if $DEBUG_API_CLIENT rescue Exception => e params = Faraday::Utils.build_query(params) path << "?" unless params.empty? || path.include?("?") diff --git a/lib/ontologies_api_client/middleware/faraday-object-cache.rb b/lib/ontologies_api_client/middleware/faraday-object-cache.rb index 4e9eb65..7b30722 100644 --- a/lib/ontologies_api_client/middleware/faraday-object-cache.rb +++ b/lib/ontologies_api_client/middleware/faraday-object-cache.rb @@ -1,4 +1,5 @@ require 'digest/sha1' +require 'active_support' require 'active_support/cache' require 'lz4-ruby' require_relative '../http' @@ -70,7 +71,7 @@ def retrieve_cached_response(request_key) env = { status: 304 } cached_response = ObjectCacheResponse.new(env) cached_response.parsed_body = ld_obj - cached_response.env.response_headers = { "x-rack-cache" => 'hit' } + cached_response.env.response_headers = { "X-Rack-Cache" => 'hit' } cached_response end @@ -88,7 +89,7 @@ def process_response(response_env, request_key) response = ObjectCacheResponse.new(response_env) response.parsed_body = ld_obj - response.env.response_headers["x-rack-cache"] = cache_state + response.env.response_headers["X-Rack-Cache"] = cache_state response end diff --git a/ontologies_api_client.gemspec b/ontologies_api_client.gemspec index fad13e4..af30944 100644 --- a/ontologies_api_client.gemspec +++ b/ontologies_api_client.gemspec @@ -12,7 +12,7 @@ Gem::Specification.new do |gem| gem.require_paths = ["lib"] gem.version = "2.2.0" - gem.add_dependency('activesupport') + gem.add_dependency('activesupport', '~> 7.0.4') gem.add_dependency('excon') gem.add_dependency('faraday') gem.add_dependency('faraday-excon', '~> 2.0.0') @@ -20,5 +20,6 @@ Gem::Specification.new do |gem| gem.add_dependency('lz4-ruby') gem.add_dependency('multi_json') gem.add_dependency('oj') + gem.add_dependency('parallel') gem.add_dependency('spawnling', '2.1.5') end diff --git a/test/middleware/test_cache.rb b/test/middleware/test_cache.rb index ddea7a9..bdd97b1 100644 --- a/test/middleware/test_cache.rb +++ b/test/middleware/test_cache.rb @@ -20,6 +20,10 @@ def setup end end + def teardown + WebMock.disable! + end + def test_cache_hit_for_get_request body1, body2 = nil # First request should not hit the cache diff --git a/test/models/test_federation.rb b/test/models/test_federation.rb new file mode 100644 index 0000000..1720bff --- /dev/null +++ b/test/models/test_federation.rb @@ -0,0 +1,69 @@ +require_relative '../test_case' +require 'pry' +require 'benchmark' + +class FederationTest < LinkedData::Client::TestCase + + def test_cache + onts = LinkedData::Client::Models::Ontology.all + onts = LinkedData::Client::Models::Ontology.all + end + + def test_federated_ontologies_all + onts = [] + time1 = Benchmark.realtime do + onts = LinkedData::Client::Models::Ontology.all + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::Ontology.all(federate: true) + end + + puts "" + puts "AgroPortal ontologies: #{onts.length} in #{time1}s" + puts "Federated ontologies: #{onts_federate.length} in #{time2}s" + + refute_equal onts.length, onts_federate.length + + onts_federate.group_by{|x| x.id.split('/')[0..-2].join('/')}.each do |portal, onts| + puts "#{portal} ontologies: #{onts.length}" + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::Ontology.all(federate: true) + end + puts "Federated ontologies with cache: #{onts_federate.length} in #{time2}s" + end + + def test_federated_submissions_all + onts = [] + time1 = Benchmark.realtime do + onts = LinkedData::Client::Models::OntologySubmission.all + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::OntologySubmission.all(federate: true) + end + + puts "" + puts "AgroPortal submissions: #{onts.length} in #{time1}s" + puts "Federated submissions: #{onts_federate.length} in #{time2}s" + + refute_equal onts.length, onts_federate.length + + onts_federate.group_by{|x| x.id.split('/')[0..-4].join('/')}.each do |portal, onts| + puts "#{portal} submissions: #{onts.length}" + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::OntologySubmission.all(federate: true) + end + puts "Federated submissions with cache: #{onts_federate.length} in #{time2}s" + + end + +end \ No newline at end of file diff --git a/test/test_case.rb b/test/test_case.rb index cd9fe2e..0f2f907 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -1,7 +1,9 @@ require 'test-unit' require_relative '../lib/ontologies_api_client' require_relative '../config/config' +require 'webmock' +WebMock.allow_net_connect! module LinkedData module Client class TestCase < Test::Unit::TestCase