Skip to content

Commit

Permalink
Merge branch 'master' into ecoportal-ontoportal-reset
Browse files Browse the repository at this point in the history
  • Loading branch information
syphax-bouazzouni committed May 27, 2024
2 parents 877f1e3 + 24fb254 commit b369660
Show file tree
Hide file tree
Showing 6 changed files with 263 additions and 121 deletions.
3 changes: 2 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ gemspec

gem 'rake'
gem 'pry'
gem 'test-unit'
gem 'test-unit'
gem 'webmock', require: false
14 changes: 14 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ GEM
i18n (>= 1.6, < 2)
minitest (>= 5.1)
tzinfo (~> 2.0)
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
bigdecimal (3.1.7)
coderay (1.1.3)
concurrent-ruby (1.1.10)
crack (1.0.0)
bigdecimal
rexml
excon (0.95.0)
faraday (2.0.1)
faraday-net_http (~> 2.0)
Expand All @@ -32,6 +38,7 @@ GEM
faraday-multipart (1.0.4)
multipart-post (~> 2)
faraday-net_http (2.1.0)
hashdiff (1.1.0)
i18n (1.12.0)
concurrent-ruby (~> 1.0)
lz4-ruby (0.3.3)
Expand All @@ -44,13 +51,19 @@ GEM
pry (0.14.1)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (5.0.5)
rake (13.0.6)
rexml (3.2.6)
ruby2_keywords (0.0.5)
spawnling (2.1.5)
test-unit (3.5.7)
power_assert
tzinfo (2.0.5)
concurrent-ruby (~> 1.0)
webmock (3.23.0)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)

PLATFORMS
x86_64-darwin-21
Expand All @@ -62,6 +75,7 @@ DEPENDENCIES
pry
rake
test-unit
webmock

BUNDLED WITH
2.4.21
11 changes: 5 additions & 6 deletions config/config.test.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# config.rb is required for testing
# unit test makes calls to bioportal api so it needs a valid API key which can
# be set via ENV variable UT_APIKEY
abort('UT_APIKEY env variable is not set. Canceling tests') unless ENV.include?('UT_APIKEY')
abort('UT_APIKEY env variable is set to an empty value. Canceling tests') unless ENV['UT_APIKEY'].size > 5

$API_CLIENT_INVALIDATE_CACHE = false
$DEBUG_API_CLIENT = false
LinkedData::Client.config do |config|
config.rest_url = 'https://data.bioontology.org'
config.apikey = ENV['UT_APIKEY']
# config.apikey = 'xxxxx-xxxxx-xxxxxxxxxx'
config.apikey = '8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'
config.links_attr = 'links'
config.cache = false
config.cache = true
config.debug_client = false
end
2 changes: 1 addition & 1 deletion lib/ontologies_api_client/collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def where(params = {}, &block)
# Find a resource by id
# @deprecated replace with "get"
def find(id, params = {})
[get(id, params)]
get(id, params)
end

##
Expand Down
213 changes: 100 additions & 113 deletions lib/ontologies_api_client/middleware/faraday-object-cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,107 +8,47 @@ class ObjectCacheResponse < Faraday::Response
attr_accessor :parsed_body
end

##
# This middleware causes Faraday to return an actual object instead of a response
# This is done so that the object is cached instead of the unparsed json body.
# Otherwise, we have to re-parse the json on every cache hit, which is extrememly costly
# when compared to unmarshaling an object.
class ObjectCache < Faraday::Middleware
def initialize(app, *arguments)
super(app)
options = arguments.last.is_a?(Hash) ? arguments.pop : {}
@logger = options.delete(:logger)
@store = options[:store] || ActiveSupport::Cache.lookup_store(nil, options)
end

if arguments.last.is_a? Hash
options = arguments.pop
@logger = options.delete(:logger)
else
options = arguments
end

@store = options[:store] || ActiveSupport::Cache.lookup_store(store, options)
def last_modified_key_id(request_key)
"LM::#{request_key}"
end

def last_retrieved_key_id(request_key)
"LR::#{request_key}"
end

def call(env)
invalidate_cache = env[:request_headers].delete(:invalidate_cache)

# Add if newer than last modified statement to headers
request_key = cache_key_for(create_request(env))
last_modified_key = "LM::#{request_key}"
last_retrieved_key = "LR::#{request_key}"
last_modified_key = last_modified_key_id(request_key)
last_retrieved_key = last_retrieved_key_id(request_key)

# If we invalidate the cache, then it forces a clean request
if invalidate_cache
cache_delete(request_key)
cache_delete(last_modified_key)
cache_delete(last_retrieved_key)
delete_cache_entries(request_key, last_modified_key, last_retrieved_key)
env[:request_headers]["Cache-Control"] = "no-cache"
puts "Invalidated key #{request_key}" if enable_debug(request_key)
end

# If we made the last request within the expiry
if cache_exist?(last_retrieved_key) && cache_exist?(request_key)
puts "Not expired: #{env[:url].to_s}, key #{request_key}" if enable_debug(request_key)
cached_item = cache_read(request_key)
ld_obj = cached_item.is_a?(Hash) && cached_item.key?(:ld_obj) ? cached_item[:ld_obj] : cached_item
env[:status] = 304
cached_response = ObjectCacheResponse.new(env)
cached_response.parsed_body = ld_obj
return cached_response
return retrieve_cached_response(request_key)
end

last_modified = cache_read(last_modified_key)
headers = env[:request_headers]
puts "last modified: " + last_modified.to_s if last_modified && enable_debug(request_key)
headers['If-Modified-Since'] = last_modified if last_modified
headers['If-Modified-Since'] = cache_read(last_modified_key) if cache_read(last_modified_key)

@app.call(env).on_complete do |response_env|
# Only cache get and head requests
if [:get, :head].include?(response_env[:method])
puts "Response status for key #{request_key}: " + response_env[:status].to_s if enable_debug(request_key)
last_modified = response_env[:response_headers]["Last-Modified"]
# Generate key using header hash
key = request_key

# If the last retrieve time is less than expiry
if response_env[:status] == 304 && cache_exist?(key)
stored_obj = cache_read(key)

# Update if last modified is different
if stored_obj[:last_modified] != last_modified
puts "Updating cache #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key)
stored_obj[:last_modified] = last_modified
cache_write(last_modified_key, last_modified)
cache_write(key, stored_obj)
end

ld_obj = stored_obj[:ld_obj]
else
if response_env[:body].nil? || response_env[:body].empty?
# We got here with an empty body, meaning the object wasn't
# in the cache (weird). So re-do the request.
puts "REDOING REQUEST, NO CACHE ENTRY for #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key)
env[:request_headers].delete("If-Modified-Since")
response_env = @app.call(env).env
puts "REDOING REQUEST expiry: #{response_env[:response_headers]["Cache-Control"]}, last_modified: #{last_modified} for key #{request_key}" if enable_debug(request_key)
end

ld_obj = LinkedData::Client::HTTP.object_from_json(response_env[:body])
# This stmt was missing in the old code, resulting in repeated calls to REST because object failed to cache
last_modified = response_env[:response_headers]["Last-Modified"]
expiry = response_env[:response_headers]["Cache-Control"].to_s.split("max-age=").last.to_i
puts "Before storing object: expiry: #{expiry}, last_modified: #{last_modified} for key #{request_key}" if enable_debug(request_key)

if expiry > 0 && last_modified
# This request is cacheable, store it
puts "Storing object: #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key)
stored_obj = {last_modified: last_modified, ld_obj: ld_obj}
cache_write(last_modified_key, last_modified)
cache_write(last_retrieved_key, true, expires_in: expiry)
cache_write(key, stored_obj)
end
end

response = ObjectCacheResponse.new(response_env)
response.parsed_body = ld_obj
response = process_response(response_env, request_key)
return response
end
end
Expand All @@ -117,8 +57,83 @@ def call(env)
private

def enable_debug(key)
return true if LinkedData::Client.settings.debug_client && (LinkedData::Client.settings.debug_client_keys.empty? || LinkedData::Client.settings.debug_client_keys.include?(key))
false
LinkedData::Client.settings.debug_client && (LinkedData::Client.settings.debug_client_keys.empty? || LinkedData::Client.settings.debug_client_keys.include?(key))
end

def delete_cache_entries(*keys)
keys.each { |key| cache_delete(key) }
end

def retrieve_cached_response(request_key)
cached_item = cache_read(request_key)
ld_obj = cached_item.is_a?(Hash) && cached_item.key?(:ld_obj) ? cached_item[:ld_obj] : cached_item
env = { status: 304 }
cached_response = ObjectCacheResponse.new(env)
cached_response.parsed_body = ld_obj
cached_response.env.response_headers = { "x-rack-cache" => 'hit' }
cached_response
end

def process_response(response_env, request_key)
last_modified = response_env[:response_headers]["Last-Modified"]
key = request_key
cache_state = "miss"

if response_env[:status] == 304 && cache_exist?(key)
cache_state = "fresh"
ld_obj = update_cache(request_key, last_modified)
else
ld_obj = cache_response(response_env, request_key)
end

response = ObjectCacheResponse.new(response_env)
response.parsed_body = ld_obj
response.env.response_headers["x-rack-cache"] = cache_state
response
end

def update_cache(request_key, last_modified)
stored_obj = cache_read(request_key)
if stored_obj[:last_modified] != last_modified
stored_obj[:last_modified] = last_modified
cache_write(last_modified_key_id(request_key), last_modified)
cache_write(request_key, stored_obj)
end
stored_obj.is_a?(Hash) && stored_obj.key?(:ld_obj) ? stored_obj[:ld_obj] : stored_obj
end

def cache_response(response_env, request_key)
last_modified = response_env[:response_headers]["Last-Modified"]

if response_env[:body].nil? || response_env[:body].empty?
# We got here with an empty body, meaning the object wasn't
# in the cache (weird). So re-do the request.
puts "REDOING REQUEST, NO CACHE ENTRY for #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key)
response_env[:request_headers].delete("If-Modified-Since")

response_env = @app.call(response_env).env
puts "REDOING REQUEST expiry: #{response_env[:response_headers]["Cache-Control"]}, last_modified: #{last_modified} for key #{request_key}" if enable_debug(request_key)
end


return nil if response_env[:body].nil? || response_env[:body].empty?

ld_obj = LinkedData::Client::HTTP.object_from_json(response_env[:body])

expiry = response_env[:response_headers]["Cache-Control"].to_s.split("max-age=").last.to_i

if expiry > 0 && last_modified
store_cache(request_key, ld_obj, last_modified, expiry)
end

ld_obj
end

def store_cache(request_key, ld_obj, last_modified, expiry)
stored_obj = { last_modified: last_modified, ld_obj: ld_obj }
cache_write(last_modified_key_id(request_key), last_modified)
cache_write(last_retrieved_key_id(request_key), true, expires_in: expiry)
cache_write(request_key, stored_obj)
end

def cache_write(key, obj, *args)
Expand All @@ -132,12 +147,6 @@ def cache_write(key, obj, *args)
if result
return result
else
# This should still get stored in memcache
# keep it in memory, though, because
# marshal/unmarshal is too slow.
# This way memcache will act as a backup
# and you load from there if it isn't
# in memory yet.
@large_object_cache ||= {}
@large_object_cache[key] = obj
cache_write_compressed(key, obj, *args)
Expand All @@ -147,11 +156,10 @@ def cache_write(key, obj, *args)

def cache_read(key)
obj = @store.read(key)
return if obj.nil?
return unless obj

if obj.is_a?(CompressedMemcache)
# Try to get from the large object cache
large_obj = @large_object_cache[key] if @large_object_cache
# Fallback to the memcache version
large_obj ||= cache_read_compressed(key)
obj = large_obj
end
Expand All @@ -162,13 +170,14 @@ def cache_exist?(key)
@store.exist?(key)
end

class CompressedMemcache; attr_accessor :key; end
class CompressedMemcache
attr_accessor :key
end

##
# Compress cache entry
def cache_write_compressed(key, obj, *args)
compressed = LZ4::compress(Marshal.dump(obj))
return if compressed.nil?
return unless compressed

placeholder = CompressedMemcache.new
placeholder.key = "#{key}::#{(Time.now.to_f * 1000).to_i}::LZ4"
begin
Expand All @@ -181,16 +190,13 @@ def cache_write_compressed(key, obj, *args)
end
end

##
# Read compressed cache entry
def cache_read_compressed(key)
obj = @store.read(key)
if obj.is_a?(CompressedMemcache)
begin
uncompressed = LZ4::uncompress(@store.read(obj.key))
obj = Marshal.load(uncompressed)
rescue StandardError => e
# There is a problem with the stored value, let's remove it so we don't get the error again
@store.delete(key)
@store.delete(obj.key)
raise e
Expand All @@ -203,35 +209,16 @@ def cache_delete(key)
@store.delete(key)
end

# Internal: Generates a String key for a given request object.
# The request object is folded into a sorted Array (since we can't count
# on hashes order on Ruby 1.8), encoded as JSON and digested as a `SHA1`
# string.
#
# Returns the encoded String.
def cache_key_for(request)
array = request.stringify_keys.to_a.sort
Digest::SHA1.hexdigest(Marshal.dump(array))
end

# Internal: Creates a new 'Hash' containing the request information.
#
# env - the environment 'Hash' from the Faraday stack.
#
# Returns a 'Hash' containing the ':method', ':url' and 'request_headers'
# entries.
def create_request(env)
request = env.to_hash.slice(:method, :url, :request_headers)
request[:request_headers] = request[:request_headers].dup
request
end

def clean_request_headers(request)
request[:request_headers].delete("If-Modified-Since")
request[:request_headers].delete("Expect")
request
end

end
end

Expand Down
Loading

0 comments on commit b369660

Please sign in to comment.