From a16d626e339d0a4ed9f91daeb3b3b174d1275a02 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Sun, 14 Apr 2024 15:08:03 +0200 Subject: [PATCH] refactor the cache middleware code --- .../middleware/faraday-object-cache.rb | 206 +++++++++--------- 1 file changed, 100 insertions(+), 106 deletions(-) diff --git a/lib/ontologies_api_client/middleware/faraday-object-cache.rb b/lib/ontologies_api_client/middleware/faraday-object-cache.rb index f5e06d3..32d6105 100644 --- a/lib/ontologies_api_client/middleware/faraday-object-cache.rb +++ b/lib/ontologies_api_client/middleware/faraday-object-cache.rb @@ -8,107 +8,47 @@ class ObjectCacheResponse < Faraday::Response attr_accessor :parsed_body end - ## - # This middleware causes Faraday to return an actual object instead of a response - # This is done so that the object is cached instead of the unparsed json body. - # Otherwise, we have to re-parse the json on every cache hit, which is extrememly costly - # when compared to unmarshaling an object. class ObjectCache < Faraday::Middleware def initialize(app, *arguments) super(app) + options = arguments.last.is_a?(Hash) ? arguments.pop : {} + @logger = options.delete(:logger) + @store = options[:store] || ActiveSupport::Cache.lookup_store(nil, options) + end - if arguments.last.is_a? Hash - options = arguments.pop - @logger = options.delete(:logger) - else - options = arguments - end - @store = options[:store] || ActiveSupport::Cache.lookup_store(store, options) + def last_modified_key_id(request_key) + "LM::#{request_key}" + end + + def last_retrieved_key_id(request_key) + "LR::#{request_key}" end def call(env) invalidate_cache = env[:request_headers].delete(:invalidate_cache) - # Add if newer than last modified statement to headers request_key = cache_key_for(create_request(env)) - last_modified_key = "LM::#{request_key}" - last_retrieved_key = "LR::#{request_key}" + last_modified_key = last_modified_key_id(request_key) + last_retrieved_key = last_retrieved_key_id(request_key) - # If we invalidate the cache, then it forces a clean request if invalidate_cache - cache_delete(request_key) - cache_delete(last_modified_key) - cache_delete(last_retrieved_key) + delete_cache_entries(request_key, last_modified_key, last_retrieved_key) env[:request_headers]["Cache-Control"] = "no-cache" puts "Invalidated key #{request_key}" if enable_debug(request_key) end - # If we made the last request within the expiry if cache_exist?(last_retrieved_key) && cache_exist?(request_key) puts "Not expired: #{env[:url].to_s}, key #{request_key}" if enable_debug(request_key) - cached_item = cache_read(request_key) - ld_obj = cached_item.is_a?(Hash) && cached_item.key?(:ld_obj) ? cached_item[:ld_obj] : cached_item - env[:status] = 304 - cached_response = ObjectCacheResponse.new(env) - cached_response.parsed_body = ld_obj - return cached_response + return retrieve_cached_response(request_key) end - last_modified = cache_read(last_modified_key) headers = env[:request_headers] - puts "last modified: " + last_modified.to_s if last_modified && enable_debug(request_key) - headers['If-Modified-Since'] = last_modified if last_modified + headers['If-Modified-Since'] = cache_read(last_modified_key) if cache_read(last_modified_key) @app.call(env).on_complete do |response_env| - # Only cache get and head requests if [:get, :head].include?(response_env[:method]) - puts "Response status for key #{request_key}: " + response_env[:status].to_s if enable_debug(request_key) - last_modified = response_env[:response_headers]["Last-Modified"] - # Generate key using header hash - key = request_key - - # If the last retrieve time is less than expiry - if response_env[:status] == 304 && cache_exist?(key) - stored_obj = cache_read(key) - - # Update if last modified is different - if stored_obj[:last_modified] != last_modified - puts "Updating cache #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key) - stored_obj[:last_modified] = last_modified - cache_write(last_modified_key, last_modified) - cache_write(key, stored_obj) - end - - ld_obj = stored_obj[:ld_obj] - else - if response_env[:body].nil? || response_env[:body].empty? - # We got here with an empty body, meaning the object wasn't - # in the cache (weird). So re-do the request. - puts "REDOING REQUEST, NO CACHE ENTRY for #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key) - env[:request_headers].delete("If-Modified-Since") - response_env = @app.call(env).env - puts "REDOING REQUEST expiry: #{response_env[:response_headers]["Cache-Control"]}, last_modified: #{last_modified} for key #{request_key}" if enable_debug(request_key) - end - - ld_obj = LinkedData::Client::HTTP.object_from_json(response_env[:body]) - # This stmt was missing in the old code, resulting in repeated calls to REST because object failed to cache - last_modified = response_env[:response_headers]["Last-Modified"] - expiry = response_env[:response_headers]["Cache-Control"].to_s.split("max-age=").last.to_i - puts "Before storing object: expiry: #{expiry}, last_modified: #{last_modified} for key #{request_key}" if enable_debug(request_key) - - if expiry > 0 && last_modified - # This request is cacheable, store it - puts "Storing object: #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key) - stored_obj = {last_modified: last_modified, ld_obj: ld_obj} - cache_write(last_modified_key, last_modified) - cache_write(last_retrieved_key, true, expires_in: expiry) - cache_write(key, stored_obj) - end - end - - response = ObjectCacheResponse.new(response_env) - response.parsed_body = ld_obj + response = process_response(response_env, request_key) return response end end @@ -117,8 +57,83 @@ def call(env) private def enable_debug(key) - return true if LinkedData::Client.settings.debug_client && (LinkedData::Client.settings.debug_client_keys.empty? || LinkedData::Client.settings.debug_client_keys.include?(key)) - false + LinkedData::Client.settings.debug_client && (LinkedData::Client.settings.debug_client_keys.empty? || LinkedData::Client.settings.debug_client_keys.include?(key)) + end + + def delete_cache_entries(*keys) + keys.each { |key| cache_delete(key) } + end + + def retrieve_cached_response(request_key) + cached_item = cache_read(request_key) + ld_obj = cached_item.is_a?(Hash) && cached_item.key?(:ld_obj) ? cached_item[:ld_obj] : cached_item + env = { status: 304 } + cached_response = ObjectCacheResponse.new(env) + cached_response.parsed_body = ld_obj + cached_response.env.response_headers = { "x-rack-cache" => 'hit' } + cached_response + end + + def process_response(response_env, request_key) + last_modified = response_env[:response_headers]["Last-Modified"] + key = request_key + cache_state = "miss" + + if response_env[:status] == 304 && cache_exist?(key) + cache_state = "fresh" + ld_obj = update_cache(request_key, last_modified) + else + ld_obj = cache_response(response_env, request_key) + end + + response = ObjectCacheResponse.new(response_env) + response.parsed_body = ld_obj + response.env.response_headers["x-rack-cache"] = cache_state + response + end + + def update_cache(request_key, last_modified) + stored_obj = cache_read(request_key) + if stored_obj[:last_modified] != last_modified + stored_obj[:last_modified] = last_modified + cache_write(last_modified_key_id(request_key), last_modified) + cache_write(request_key, stored_obj) + end + stored_obj + end + + def cache_response(response_env, request_key) + last_modified = response_env[:response_headers]["Last-Modified"] + + if response_env[:body].nil? || response_env[:body].empty? + # We got here with an empty body, meaning the object wasn't + # in the cache (weird). So re-do the request. + puts "REDOING REQUEST, NO CACHE ENTRY for #{response_env[:url].to_s}, key #{request_key}" if enable_debug(request_key) + response_env[:request_headers].delete("If-Modified-Since") + + response_env = @app.call(response_env).env + puts "REDOING REQUEST expiry: #{response_env[:response_headers]["Cache-Control"]}, last_modified: #{last_modified} for key #{request_key}" if enable_debug(request_key) + end + + + return nil if response_env[:body].nil? || response_env[:body].empty? + + ld_obj = LinkedData::Client::HTTP.object_from_json(response_env[:body]) + + expiry = response_env[:response_headers]["Cache-Control"].to_s.split("max-age=").last.to_i + + if expiry > 0 && last_modified + store_cache(request_key, ld_obj, last_modified, expiry) + end + + ld_obj + end + + def store_cache(request_key, ld_obj, last_modified, expiry) + stored_obj = { last_modified: last_modified, ld_obj: ld_obj } + cache_write(last_modified_key_id(request_key), last_modified) + cache_write(last_retrieved_key_id(request_key), true, expires_in: expiry) + cache_write(request_key, stored_obj) end def cache_write(key, obj, *args) @@ -132,12 +147,6 @@ def cache_write(key, obj, *args) if result return result else - # This should still get stored in memcache - # keep it in memory, though, because - # marshal/unmarshal is too slow. - # This way memcache will act as a backup - # and you load from there if it isn't - # in memory yet. @large_object_cache ||= {} @large_object_cache[key] = obj cache_write_compressed(key, obj, *args) @@ -147,11 +156,10 @@ def cache_write(key, obj, *args) def cache_read(key) obj = @store.read(key) - return if obj.nil? + return unless obj + if obj.is_a?(CompressedMemcache) - # Try to get from the large object cache large_obj = @large_object_cache[key] if @large_object_cache - # Fallback to the memcache version large_obj ||= cache_read_compressed(key) obj = large_obj end @@ -162,13 +170,14 @@ def cache_exist?(key) @store.exist?(key) end - class CompressedMemcache; attr_accessor :key; end + class CompressedMemcache + attr_accessor :key + end - ## - # Compress cache entry def cache_write_compressed(key, obj, *args) compressed = LZ4::compress(Marshal.dump(obj)) - return if compressed.nil? + return unless compressed + placeholder = CompressedMemcache.new placeholder.key = "#{key}::#{(Time.now.to_f * 1000).to_i}::LZ4" begin @@ -181,8 +190,6 @@ def cache_write_compressed(key, obj, *args) end end - ## - # Read compressed cache entry def cache_read_compressed(key) obj = @store.read(key) if obj.is_a?(CompressedMemcache) @@ -190,7 +197,6 @@ def cache_read_compressed(key) uncompressed = LZ4::uncompress(@store.read(obj.key)) obj = Marshal.load(uncompressed) rescue StandardError => e - # There is a problem with the stored value, let's remove it so we don't get the error again @store.delete(key) @store.delete(obj.key) raise e @@ -203,23 +209,11 @@ def cache_delete(key) @store.delete(key) end - # Internal: Generates a String key for a given request object. - # The request object is folded into a sorted Array (since we can't count - # on hashes order on Ruby 1.8), encoded as JSON and digested as a `SHA1` - # string. - # - # Returns the encoded String. def cache_key_for(request) array = request.stringify_keys.to_a.sort Digest::SHA1.hexdigest(Marshal.dump(array)) end - # Internal: Creates a new 'Hash' containing the request information. - # - # env - the environment 'Hash' from the Faraday stack. - # - # Returns a 'Hash' containing the ':method', ':url' and 'request_headers' - # entries. def create_request(env) request = env.to_hash.slice(:method, :url, :request_headers) request[:request_headers] = request[:request_headers].dup