diff --git a/.gemspec b/.gemspec index 318e31c..e6ece7c 100644 --- a/.gemspec +++ b/.gemspec @@ -34,16 +34,25 @@ Gem::Specification.new do |spec| # Dependencies ## bel.rb - spec.add_runtime_dependency 'bel', '0.4.1' + spec.add_runtime_dependency 'bel', '0.6.0' + + ## bel.rb translator dependencies + spec.add_runtime_dependency 'json-ld', '1.99.0' + spec.add_runtime_dependency 'rdf-json', '1.99.0' + spec.add_runtime_dependency 'rdf-rdfa', '1.99.0' + spec.add_runtime_dependency 'rdf-rdfxml', '1.99.0' + spec.add_runtime_dependency 'rdf-trig', '1.99.0.1' + spec.add_runtime_dependency 'rdf-trix', '1.99.0' + spec.add_runtime_dependency 'rdf-turtle', '1.99.0' ## bel.rb plugin - annotation/namespace search spec.add_runtime_dependency 'bel-search-sqlite', '0.4.2' ## bel.rb plugin - RDF repository using Apache Jena - spec.add_runtime_dependency 'bel-rdf-jena', '0.4.1' + spec.add_runtime_dependency 'bel-rdf-jena', '0.4.2' ## RDF - RDF abstraction - spec.add_runtime_dependency 'rdf', '1.99.0' + spec.add_runtime_dependency 'rdf', '1.99.1' ## Mongo - Faceted search of evidence. spec.add_runtime_dependency 'mongo', '1.12.5' @@ -55,14 +64,13 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency 'json_schema', '0.10.0' spec.add_runtime_dependency 'multi_json', '1.11.2' spec.add_runtime_dependency 'oat', '0.4.6' - spec.add_runtime_dependency 'puma', '2.15.3' + spec.add_runtime_dependency 'puma', '3.1.0' spec.add_runtime_dependency 'rack', '1.6.4' spec.add_runtime_dependency 'rack-cors', '0.4.0' spec.add_runtime_dependency 'rack-handlers', '0.7.0' spec.add_runtime_dependency 'sinatra', '1.4.6' spec.add_runtime_dependency 'sinatra-contrib', '1.4.6' spec.add_runtime_dependency 'jwt', '1.5.2' - spec.add_runtime_dependency 'rest-client', '1.8.0' end # vim: ts=2 sw=2: # encoding: utf-8 diff --git a/CHANGELOG.md b/CHANGELOG.md index bb6b4df..d673353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,25 @@ All notable changes to openbel-api will be documented in this file. The curated This project adheres to [Semantic Versioning][Semantic Versioning]. +## [0.6.0][0.6.0] - 2016-02-03 +### Added +- Retrieve evidence in a format supported by BEL translator plugins ([Issue 44][44]). +- Retrieve dataset evidence in a format supported by BEL translator plugins ([Issue 99][99]). +### Fixed +- Dataset evidence collection is missing annotation/namespace URIs ([Issue 95][95]). +- Facets are not created for evidence uploaded through a dataset. +### Changed +- MongoDB version 3.2.0 is now required due to use of `$slice` operator in Aggregation queries. + +----- + ## [0.5.1][0.5.1] - 2015-12-18 ### Fixed - Authentication error for MongoDB user when faceting on `GET /api/evidence` ([Issue #93][93]). +### Changed +- MongoDB version 3.2.0 is now required due to use of `$slice` operator in Aggregation queries ([Issue ?][]). + ----- ## [0.5.0][0.5.0] - 2015-12-17 @@ -44,6 +59,9 @@ This project adheres to [Semantic Versioning][Semantic Versioning]. [0.5.0]: https://github.com/OpenBEL/openbel-api/compare/0.4.0...0.5.0 [Semantic Versioning]: http://semver.org [MongoDB User Authentication]: https://github.com/OpenBEL/openbel-api/wiki/Configuring-the-Evidence-Store#mongodb-user-authentication +[44]: https://github.com/OpenBEL/openbel-api/issues/44 [91]: https://github.com/OpenBEL/openbel-api/issues/91 [92]: https://github.com/OpenBEL/openbel-api/issues/92 [93]: https://github.com/OpenBEL/openbel-api/issues/93 +[95]: https://github.com/OpenBEL/openbel-api/issues/95 +[99]: https://github.com/OpenBEL/openbel-api/issues/99 diff --git a/README.md b/README.md index 2b13e34..d253438 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ The OpenBEL API is built to run with [JRuby][JRuby] and [Java 8][Java 8]. - [JRuby][JRuby], 9.x series (9.0.x.0 is recommended) - The 9.x series is required due to a Ruby language 2.0 requirement. - See "Installation" below for configuring JRuby and isolating the openbel-api application. -- [MongoDB][MongoDB], version 3.0 or greater +- [MongoDB][MongoDB], version 3.2 or greater - Follow [MongoDB download][MongoDB download] page for download and installation instructions. - [SQLite][SQLite], version 3.8.0 or greater - Follow [SQLite download][SQLite download] page for download and installation instructions. @@ -236,7 +236,7 @@ API documentation with *Try it* functionality is available [here][OpenBEL API do ----- -Built with collaboration and :heart: by the [OpenBEL][OpenBEL] community. +Built with collaboration and a lot of :heart: by the [OpenBEL][OpenBEL] community. [OpenBEL]: http://www.openbel.org [OpenBEL Platform]: https://github.com/OpenBEL/openbel-platform diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 0000000..051389e --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,75 @@ +# Upgrading openbel-api + +This files contains documentation for upgrading to specific versions of OpenBEL API. + +## 0.6.0 Upgrade (2016-03-15) + +### MongoDB 3.2 + +This release requires MongoDB >= 3.2. The latest MongoDB release is version [3.2.3](https://www.mongodb.com/mongodb-3.2) as of March 15th, 2016. OpenBEL API will fail to start (with message) if for MongoDB's version is less than 3.2. + +Note: MongoDB 3.2 uses the *wiredTiger* storage engine by default. If you previously used the *mmapv1* storage engine for OpenBEL API then do not set *storage.engine* in your MongoDB configuration. MongoDB will determine the *storage.engine* by the data in your *dbPath*. See this [MongoDB article](https://docs.mongodb.org/manual/core/wiredtiger/) for details. + +----- + +### MongoDB Migration + +The 0.6.0 version of OpenBEL API introduces a change to how evidence facets are stored in MongoDB. + +#### Change Detail + +##### 0.5.1 + +Collections: + +- `evidence` + - Stores evidence.facets as strings. +- `evidence_facets` + - Stores evidence facet objects for all searches. + +##### 0.6.0 + +Collections: + +- `evidence` + - Stores evidence.facets as JSON objects for use in Mongo aggregation operations. +- `evidence_facet_cache` + - Stores the facet collection name for each unique evidence search. +- `evidence_facet_cache_{UUID}` + - Stores evidence facet objects for a specific evidence search. + +#### Migration Procedure + +Migrations are JRuby scripts that can be run directly as scripts (i.e. includes `#!/usr/bin/env jruby` shebang). You will need the OpenBEL API repository on GitHub as well as your OpenBEL API configuration file. + +It is recommended to stop OpenBEL API and MongoDB before migrating. + +1. Stop OpenBEL API. +2. Stop MongoDB daemon. +3. Clone OpenBEL API repository. + - `git clone https://github.com/OpenBEL/openbel-api.git` +4. Change directory to the 0.6.0 migrations directory. + - `cd openbel-api/tools/migrations/0.6.0` +5. Run *migrate_evidence_facets.rb* to update evidence.facets to JSON objects. + - `./migrate_evidence_facets.rb YOUR_CONFIG.yml` or `jruby migrate_evidence_facets.rb YOUR_CONFIG.yml` +6. Run *drop_unused_collection.rb* to remove the old *evidence_facets* collection. + - `./drop_unused_collection.rb YOUR_CONFIG.yml` or `jruby drop_unused_collection.rb YOUR_CONFIG.yml` +7. Start MongoDB daemon. +8. Start OpenBEL API. + +----- + +### Conflicting gem versions. + +The *bel*, *puma*, and *rdf* gem dependencies have been upgraded. This may cause conflicting gem versions to exist in the same *GEM_HOME* location. + +If you wish to install into an existing *GEM_HOME* (versus an isolated *GEM_HOME*) then please uninstall these gems: + +- `gem uninstall bel puma rdf` +- Say yes to remove existing command scripts as well. + +----- + +### Installation + +Install OpenBEL API 0.6.0 with `gem install openbel-api --version 0.6.0`. diff --git a/VERSION b/VERSION index 4b9fcbe..a918a2a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.1 +0.6.0 diff --git a/app/openbel/api/config.rb b/app/openbel/api/config.rb index ffe061d..8b7f006 100644 --- a/app/openbel/api/config.rb +++ b/app/openbel/api/config.rb @@ -92,6 +92,18 @@ def self.validate_evidence_store(evidence_store) ] end + # Check Mongo server version >= 3.2. + # The aggregation framework's $slice operator is used which requires 3.2. + if mongo_client.server_version.to_s !~ /^3.2/ + return [ + true, <<-ERR + MongoDB version 3.2 or greater is required. + + MongoDB version: #{mongo_client.server_version} + ERR + ] + end + # Attempt access of database. db = mongo_client.db(mongo[:database]) diff --git a/app/openbel/api/helpers/base.rb b/app/openbel/api/helpers/base.rb new file mode 100644 index 0000000..b588b91 --- /dev/null +++ b/app/openbel/api/helpers/base.rb @@ -0,0 +1,17 @@ +module OpenBEL + module Helpers + + DEFAULT_CONTENT_TYPE = 'application/hal+json' + DEFAULT_CONTENT_TYPE_ID = :hal + + def wants_default? + if params[:format] + return params[:format] == DEFAULT_CONTENT_TYPE + end + + request.accept.any? { |accept_entry| + accept_entry.to_s == DEFAULT_CONTENT_TYPE + } + end + end +end diff --git a/app/openbel/api/helpers/evidence.rb b/app/openbel/api/helpers/evidence.rb new file mode 100644 index 0000000..324d4c2 --- /dev/null +++ b/app/openbel/api/helpers/evidence.rb @@ -0,0 +1,74 @@ +require 'bel/util' +require_relative 'base' +require_relative 'translators' + +module OpenBEL + module Helpers + + def render_evidence_collection( + name, page_results, start, size, filters, + filtered_total, collection_total, evidence_api + ) + # see if the user requested a BEL translator (Accept header or ?format) + translator = Translators.requested_translator(request, params) + translator_plugin = Translators.requested_translator_plugin(request, params) + + halt 404 unless page_results[:cursor].has_next? + + # Serialize to HAL if they [Accept]ed it, specified it as ?format, or + # no translator was found to match request. + if wants_default? || !translator + facets = page_results[:facets] + pager = Pager.new(start, size, filtered_total) + evidence = page_results[:cursor].map { |item| + item.delete('facets') + item + }.to_a + + options = { + :facets => facets, + :start => start, + :size => size, + :filters => filters, + :metadata => { + :collection_paging => { + :total => collection_total, + :total_filtered => pager.total_size, + :total_pages => pager.total_pages, + :current_page => pager.current_page, + :current_page_size => evidence.size, + } + } + } + + # pager links + options[:previous_page] = pager.previous_page + options[:next_page] = pager.next_page + + render_collection(evidence, :evidence, options) + else + extension = translator_plugin.file_extensions.first + + response.headers['Content-Type'] = translator_plugin.media_types.first + status 200 + attachment "#{name}.#{extension}" + stream :keep_open do |response| + cursor = page_results[:cursor] + dataset_evidence = cursor.lazy.map { |evidence| + evidence.delete('facets') + evidence.delete('_id') + evidence = BEL::Model::Evidence.create(BEL.keys_to_symbols(evidence)) + evidence.bel_statement = BEL::Model::Evidence.parse_statement(evidence) + evidence + } + + translator.write( + dataset_evidence, response, + :annotation_reference_map => evidence_api.find_all_annotation_references, + :namespace_reference_map => evidence_api.find_all_namespace_references + ) + end + end + end + end +end diff --git a/app/openbel/api/helpers/filters.rb b/app/openbel/api/helpers/filters.rb new file mode 100644 index 0000000..3eb13be --- /dev/null +++ b/app/openbel/api/helpers/filters.rb @@ -0,0 +1,94 @@ +module OpenBEL + module Helpers + + # Parse filter query parameters and partition into an {Array}. The first + # index will contain the valid filters and the second index will contain + # the invalid filters. + # + # @param [Array] filter_query_params an array of filter strings + # encoded in JSON + # @return [Array, Array] the first index holds the + # valid, filter {Hash hashes}; the second index holds the invalid, + # filter {String strings} + def parse_filters(filter_query_params) + filter_query_params.map { |filter_string| + begin + MultiJson.load filter_string + rescue MultiJson::ParseError => ex + "#{ex} (filter: #{filter_string})" + end + }.partition { |filter| + filter.is_a?(Hash) + } + end + + # Retrieve the filters that do not provide category, name, and value keys. + # + # The parsed, incomplete filters will contain an +:error+ key that provides + # an error message intended for the user. + # + # @param [Array] filters an array of filter {Hash hashes} + # @return [Array] an array of incomplete filter {Hash hashes} that + # contain a human-readable error at the +:error+ key + def incomplete_filters(filters) + filters.select { |filter| + ['category', 'name', 'value'].any? { |f| !filter.include? f } + }.map { |incomplete_filter| + category, name, value = incomplete_filter.values_at('category', 'name', 'value') + error = <<-MSG.gsub(/^\s+/, '').strip + Incomplete filter, category:"#{category}", name:"#{name}", and value:"#{value}". + MSG + incomplete_filter.merge(:error => error) + } + end + + # Retrieve the filters that represent invalid full-text search values. + # + # The parsed, invalid full-text search filters will contain an +:error+ key + # that provides an error message intended for the user. + # + # @param [Array] filters an array of filter {Hash hashes} + # @return [Array] an array of invalid full-text search filter + # {Hash hashes} that contain a human-readable error at the + # +:error+ key + def invalid_fts_filters(filters) + filters.select { |filter| + category, name, value = filter.values_at('category', 'name', 'value') + category == 'fts' && name == 'search' && value.to_s.length <= 1 + }.map { |invalid_fts_filter| + error = <<-MSG.gsub(/^\s+/, '').strip + Full-text search filter values must be larger than one. + MSG + invalid_fts_filter.merge(:error => error) + } + end + + # Validate the requested filter query strings. If all filters are valid + # then return them as {Hash hashes}, otherwise halt 400 Bad Request and + # return JSON error response. + def validate_filters! + filter_query_params = CGI::parse(env["QUERY_STRING"])['filter'] + valid_filters, invalid_filters = parse_filters(filter_query_params) + + invalid_filters |= incomplete_filters(valid_filters) + invalid_filters |= invalid_fts_filters(valid_filters) + + return valid_filters if invalid_filters.empty? + + halt(400, { 'Content-Type' => 'application/json' }, render_json({ + :status => 400, + :msg => "Bad Request", + :detail => + invalid_filters. + map { |invalid_filter| + if invalid_filter.is_a?(Hash) && invalid_filter[:error] + invalid_filter[:error] + else + invalid_filter + end + }. + map(&:to_s) + })) + end + end +end diff --git a/app/openbel/api/helpers/translators.rb b/app/openbel/api/helpers/translators.rb new file mode 100644 index 0000000..bece44f --- /dev/null +++ b/app/openbel/api/helpers/translators.rb @@ -0,0 +1,73 @@ +require 'bel' + +module OpenBEL + module Helpers + + # Helpers for translator functionality based on user's requested media + # type. + module Translators + + # Open {::Sinatra::Helpers::Stream} and add the +puts+, +write+, and + # +flush+ methods. This is necessary because the RDF.rb writers will call + # these methods on the IO (in this case {::Sinatra::Helpers::Stream}). + class ::Sinatra::Helpers::Stream + + # Write each string in +args*, new-line delimited, to the stream. + def puts(*args) + self << ( + args.map { |string| "#{string.encode(Encoding::UTF_8)}\n" }.join + ) + end + + # Write the string to the stream. + def write(string) + self << string.encode(Encoding::UTF_8) + end + + # flush is a no-op; flushing is handled by sinatra/rack server + def flush; end + end + + # Find a bel.rb translator plugin by value. The value is commonly the + # id, file extension, or media type associated with the translator + # plugin. + # + # @param [#to_s] value used to look up translator plugin registered + # with bel.rb + # @return [BEL::Translator] the translator instance; or +nil+ if one + # cannot be found + def self.for(value) + BEL.translator(symbolize_value(value)) + end + + def self.plugin_for(value) + BEL::Translator::Plugins.for(symbolize_value(value)) + end + + def self.requested_translator_plugin(request, params) + if params && params[:format] + self.plugin_for(params[:format]) + else + request.accept.flat_map { |accept_entry| + self.plugin_for(accept_entry) + }.compact.first + end + end + + def self.requested_translator(request, params) + if params && params[:format] + self.for(params[:format]) + else + request.accept.flat_map { |accept_entry| + self.for(accept_entry) + }.compact.first + end + end + + def self.symbolize_value(value) + value.to_s.to_sym + end + private_class_method :symbolize_value + end + end +end diff --git a/app/openbel/api/resources/evidence_transform.rb b/app/openbel/api/resources/evidence_transform.rb index fff582b..3ff9eef 100644 --- a/app/openbel/api/resources/evidence_transform.rb +++ b/app/openbel/api/resources/evidence_transform.rb @@ -94,23 +94,11 @@ def structured_annotation(name, value, base_url) def free_annotation(name, value) { - :name => normalize_annotation_name(name), + :name => name, :value => value } end - def normalize_annotation_name(name, options = {}) - name_s = name.to_s - - if name_s.empty? - nil - else - name_s. - split(%r{[^a-zA-Z0-9]+}). - map! { |word| word.capitalize }. - join - end - end end class AnnotationGroupingTransform diff --git a/app/openbel/api/routes/authenticate.rb b/app/openbel/api/routes/authenticate.rb index 1be9864..bbea86f 100644 --- a/app/openbel/api/routes/authenticate.rb +++ b/app/openbel/api/routes/authenticate.rb @@ -1,100 +1,8 @@ -require 'uri' -require 'rest-client' - -def current_host(env) - scheme = env['rack.url_scheme'] || 'http' - host = env['HTTP_HOST'] - "#{scheme}://#{host}" -end - -def current_path(env) - scheme = env['rack.url_scheme'] || 'http' - host = env['HTTP_HOST'] - path = env['PATH_INFO'] - "#{scheme}://#{host}#{path}" -end - module OpenBEL module Routes class Authenticate < Base - get '/api/authenticate' do - state = params[:state] - code = params[:code] - if code.nil? - default_connection = OpenBEL::Settings[:auth][:default_connection] - default_auth_url = current_path(env) + "/#{default_connection}" - if not state.nil? - default_auth_url += "?state=#{state}" - end - redirect to(default_auth_url) - end - - domain = OpenBEL::Settings[:auth][:domain] - id = OpenBEL::Settings[:auth][:id] - secret = OpenBEL::Settings[:auth][:secret] - - callback_url = current_path(env) - payload = { - client_id: id, - client_secret: secret, - redirect_uri: callback_url, - code: code, - grant_type: :authorization_code - } - - token_url = "https://#{domain}/oauth/token" - body = payload.to_json - - begin - token_response = RestClient.post token_url, body, - :content_type => :json, - :accept => :json - rescue => e - hdrs = {'Content-Type' => 'application/json'} - msg = {error: e.response } - return [401, hdrs, [msg.to_json]] - end - - token_response = JSON.parse(token_response) - access_token = token_response['access_token'] - jwt = token_response['id_token'] - - user_url = "https://#{domain}/userinfo?access_token=#{access_token}" - begin - user_response = RestClient.get user_url, :accept => :json - rescue => e - hdrs = {'Content-Type' => 'application/json'} - msg = {error: e.response } - return [401, hdrs, [msg.to_json]] - end - - email = JSON.parse(user_response)['email'] - hdrs = {'Content-Type' => 'application/json'} - msg = {success: email} - cookies[:jwt] = jwt - if not state.nil? - redirect to(state + "?jwt=#{jwt}") - else - [200, hdrs, [msg.to_json]] - end - end - - get '/api/authenticate/:connection' do - state = params[:state] - redirect_setting = OpenBEL::Settings[:auth][:redirect] - connection = params[:connection] - redirect_uri = current_host(env) + '/api/authenticate' - auth_url = "#{redirect_setting}" - auth_url += "&redirect_uri=#{redirect_uri}" - auth_url += "&connection=#{connection}" - if not state.nil? - auth_url += "&state=#{state}" - end - redirect to(auth_url) - end - get '/api/authentication-enabled' do enabled = OpenBEL::Settings[:auth][:enabled] hdrs = {'Content-Type' => 'application/json'} diff --git a/app/openbel/api/routes/base.rb b/app/openbel/api/routes/base.rb index 52dfeb1..3b9d7c2 100644 --- a/app/openbel/api/routes/base.rb +++ b/app/openbel/api/routes/base.rb @@ -21,14 +21,14 @@ class Base < Sinatra::Application include OpenBEL::Resource::Namespaces include OpenBEL::Schemas - DEFAULT_CONTENT_TYPE = 'application/hal+json' - SPOKEN_CONTENT_TYPES = %w[application/hal+json application/json] + DEFAULT_CONTENT_TYPE = 'application/hal+json' + DEFAULT_CONTENT_TYPE_ID = :hal + SPOKEN_CONTENT_TYPES = %w[application/hal+json application/json] SPOKEN_CONTENT_TYPES.concat( BEL::Translator.plugins.values.flat_map { |p| p.media_types.map(&:to_s) } ) - - SCHEMA_BASE_URL = 'http://next.belframework.org/schemas/' - RESOURCE_SERIALIZERS = { + SCHEMA_BASE_URL = 'http://next.belframework.org/schemas/' + RESOURCE_SERIALIZERS = { :annotation => AnnotationResourceSerializer, :annotation_collection => AnnotationCollectionSerializer, :annotation_value => AnnotationValueResourceSerializer, @@ -80,6 +80,16 @@ def schema_url(name) SCHEMA_BASE_URL + "#{name}.schema.json" end + def wants_default? + if params[:format] + return params[:format] == DEFAULT_CONTENT_TYPE + end + + request.accept.any? { |accept_entry| + accept_entry.to_s == DEFAULT_CONTENT_TYPE + } + end + def validate_media_type!(content_type, options = {}) ctype = request.content_type valid = ctype.start_with? content_type @@ -143,6 +153,10 @@ def read_filter(filter_json) end end + def write_filter(filter) + MultiJson.dump(filter) + end + def render_json(obj, media_type = 'application/hal+json', profile = nil) ctype = if profile diff --git a/app/openbel/api/routes/datasets.rb b/app/openbel/api/routes/datasets.rb index f890db5..ee6e5ba 100644 --- a/app/openbel/api/routes/datasets.rb +++ b/app/openbel/api/routes/datasets.rb @@ -1,10 +1,13 @@ require 'bel' +require 'bel/util' require 'rdf' require 'cgi' require 'multi_json' require 'openbel/api/evidence/mongo' require 'openbel/api/evidence/facet_filter' require_relative '../resources/evidence_transform' +require_relative '../helpers/evidence' +require_relative '../helpers/filters' require_relative '../helpers/pager' module OpenBEL @@ -23,11 +26,14 @@ class Datasets < Base :json => 'application/json', } - EVIDENCE_BATCH = 500 + MONGO_BATCH = 500 + FACET_THRESHOLD = 10000 def initialize(app) super + BEL.translator(:rdf) + # Evidence API using Mongo. mongo = OpenBEL::Settings[:evidence_store][:mongo] @api = OpenBEL::Evidence::Evidence.new(mongo) @@ -37,9 +43,6 @@ def initialize(app) :tdb_directory => OpenBEL::Settings[:resource_rdf][:jena][:tdb_directory] ) - # Load RDF monkeypatches. - BEL::Translator.plugins[:rdf].create_translator - # Annotations using RdfRepository annotations = BEL::Resource::Annotations.new(@rr) @annotation_transform = AnnotationTransform.new(annotations) @@ -239,7 +242,12 @@ def retrieve_dataset(uri) # Add batches of read evidence objects; save to Mongo and RDF. # TODO Add JRuby note regarding Enumerator threading. + evidence_count = 0 evidence_batch = [] + + # Clear out all facets before loading dataset. + @api.delete_facets + BEL.evidence(io, type).each do |ev| # Standardize annotations from experiment_context. @annotation_transform.transform_evidence!(ev, base_url) @@ -247,14 +255,16 @@ def retrieve_dataset(uri) ev.metadata[:dataset] = dataset_id facets = map_evidence_facets(ev) ev.bel_statement = ev.bel_statement.to_s - hash = ev.to_h + hash = BEL.object_convert(String, ev.to_h) { |str| + str.gsub(/\n/, "\\n").gsub(/\r/, "\\r") + } hash[:facets] = facets # Create dataset field for efficient removal. hash[:_dataset] = dataset_id evidence_batch << hash - if evidence_batch.size == EVIDENCE_BATCH + if evidence_batch.size == MONGO_BATCH _ids = @api.create_evidence(evidence_batch) dataset_parts = _ids.map { |object_id| @@ -263,6 +273,13 @@ def retrieve_dataset(uri) @rr.insert_statements(dataset_parts) evidence_batch.clear + + # Clear out all facets after FACET_THRESHOLD nanopubs have been seen. + evidence_count += MONGO_BATCH + if evidence_count >= FACET_THRESHOLD + @api.delete_facets + evidence_count = 0 + end end end @@ -277,6 +294,9 @@ def retrieve_dataset(uri) evidence_batch.clear end + # Clear out all facets after the dataset is completely loaded. + @api.delete_facets + status 201 headers 'Location' => void_dataset_uri.to_s end @@ -312,142 +332,19 @@ def retrieve_dataset(uri) start = (params[:start] || 0).to_i size = (params[:size] || 0).to_i faceted = as_bool(params[:faceted]) - max_values_per_facet = (params[:max_values_per_facet] || 0).to_i - - # check filters - filters = [] - filter_params = CGI::parse(env["QUERY_STRING"])['filter'] - filter_params.each do |filter| - filter = read_filter(filter) - halt 400 unless ['category', 'name', 'value'].all? { |f| filter.include? f} - - if filter['category'] == 'fts' && filter['name'] == 'search' - unless filter['value'].to_s.length > 1 - halt( - 400, - { 'Content-Type' => 'application/json' }, - render_json({ - :status => 400, - :msg => 'Full-text search filter values must be larger than one.' - }) - ) - end - end + max_values_per_facet = (params[:max_values_per_facet] || -1).to_i - # Remove dataset filters since we're filtering a specific one already. - next if filter.values_at('category', 'name') == ['metadata', 'dataset'] - - filters << filter - end + filters = validate_filters! collection_total = @api.count_evidence filtered_total = @api.count_evidence(filters) - page_results = @api.find_dataset_evidence(dataset, filters, start, size, faceted) - - accept_type = request.accept.find { |accept_entry| - ACCEPTED_TYPES.values.include?(accept_entry.to_s) - } - accept_type ||= DEFAULT_TYPE - - if params[:format] - translator = BEL::Translator.plugins[params[:format].to_sym] - halt 501 if !translator || translator.id == :rdf - accept_type = [translator.media_types].flatten.first - end - - if accept_type == DEFAULT_TYPE - evidence = page_results[:cursor].map { |item| - item.delete('facets') - item - }.to_a - - facets = page_results[:facets] - - halt 404 if evidence.empty? - - pager = Pager.new(start, size, filtered_total) - - options = { - :start => start, - :size => size, - :filters => filter_params, - :metadata => { - :collection_paging => { - :total => collection_total, - :total_filtered => pager.total_size, - :total_pages => pager.total_pages, - :current_page => pager.current_page, - :current_page_size => evidence.size, - } - } - } - - if facets - # group by category/name - hashed_values = Hash.new { |hash, key| hash[key] = [] } - facets.each { |facet| - filter = read_filter(facet['_id']) - category, name = filter.values_at('category', 'name') - next if !category || !name - - key = [category.to_sym, name.to_sym] - facet_obj = { - :value => filter['value'], - :filter => facet['_id'], - :count => facet['count'] - } - hashed_values[key] << facet_obj - } - - if max_values_per_facet == 0 - facet_hashes = hashed_values.map { |(category, name), value_objects| - { - :category => category, - :name => name, - :values => value_objects - } - } - else - facet_hashes = hashed_values.map { |(category, name), value_objects| - { - :category => category, - :name => name, - :values => value_objects.take(max_values_per_facet) - } - } - end + page_results = @api.find_dataset_evidence(dataset, filters, start, size, faceted, max_values_per_facet) + name = dataset[:identifier].gsub(/[^\w]/, '_') - options[:facets] = facet_hashes - end - - # pager links - options[:previous_page] = pager.previous_page - options[:next_page] = pager.next_page - - render_collection(evidence, :evidence, options) - else - out_translator = BEL.translator(accept_type) - extension = ACCEPTED_TYPES.key(accept_type.to_s) - - response.headers['Content-Type'] = accept_type - status 200 - attachment "#{dataset[:identifier].gsub(/[^\w]/, '_')}.#{extension}" - stream :keep_open do |response| - cursor = page_results[:cursor] - json_evidence_enum = cursor.lazy.map { |evidence| - evidence.delete('facets') - evidence.delete('_id') - evidence.keys.each do |key| - evidence[key.to_sym] = evidence.delete(key) - end - BEL::Model::Evidence.create(evidence) - } - - out_translator.write(json_evidence_enum) do |converted_evidence| - response << converted_evidence - end - end - end + render_evidence_collection( + name, page_results, start, size, filters, + filtered_total, collection_total, @api + ) end get '/api/datasets' do @@ -484,6 +381,7 @@ def retrieve_dataset(uri) halt 404 unless dataset_exists?(void_dataset_uri) dataset = retrieve_dataset(void_dataset_uri) + # XXX Removes all facets due to load of many evidence. @api.delete_dataset(dataset[:identifier]) @rr.delete_statement(RDF::Statement.new(void_dataset_uri, nil, nil)) @@ -500,6 +398,7 @@ def retrieve_dataset(uri) datasets.each do |void_dataset_uri| dataset = retrieve_dataset(void_dataset_uri) + # XXX Removes all facets due to load of many evidence. @api.delete_dataset(dataset[:identifier]) @rr.delete_statement(RDF::Statement.new(void_dataset_uri, nil, nil)) end diff --git a/app/openbel/api/routes/evidence.rb b/app/openbel/api/routes/evidence.rb index bd271be..e7021b7 100644 --- a/app/openbel/api/routes/evidence.rb +++ b/app/openbel/api/routes/evidence.rb @@ -3,6 +3,8 @@ require 'openbel/api/evidence/mongo' require 'openbel/api/evidence/facet_filter' require_relative '../resources/evidence_transform' +require_relative '../helpers/evidence' +require_relative '../helpers/filters' require_relative '../helpers/pager' module OpenBEL @@ -142,19 +144,7 @@ def keys_to_s_deep(hash) size = (params[:size] || 0).to_i group_as_array = as_bool(params[:group_as_array]) - # check filters - filters = [] - filter_params = CGI::parse(env["QUERY_STRING"])['filter'] - filter_params.each do |filter| - filter = read_filter(filter) - halt 400 unless ['category', 'name', 'value'].all? { |f| filter.include? f} - - if filter['category'] == 'fts' && filter['name'] == 'search' - halt 400 unless filter['value'].to_s.length > 1 - end - - filters << filter - end + filters = validate_filters! cursor = @api.find_evidence(filters, start, size, false)[:cursor] if group_as_array @@ -168,93 +158,18 @@ def keys_to_s_deep(hash) start = (params[:start] || 0).to_i size = (params[:size] || 0).to_i faceted = as_bool(params[:faceted]) - max_values_per_facet = (params[:max_values_per_facet] || 0).to_i + max_values_per_facet = (params[:max_values_per_facet] || -1).to_i - # check filters - filters = [] - filter_params = CGI::parse(env["QUERY_STRING"])['filter'] - filter_params.each do |filter| - filter = read_filter(filter) - halt 400 unless ['category', 'name', 'value'].all? { |f| filter.include? f} - - if filter['category'] == 'fts' && filter['name'] == 'search' - halt 400 unless filter['value'].to_s.length > 1 - end - - filters << filter - end + filters = validate_filters! collection_total = @api.count_evidence() filtered_total = @api.count_evidence(filters) - page_results = @api.find_evidence(filters, start, size, faceted) - evidence = page_results[:cursor].map { |item| - item.delete('facets') - item - }.to_a - facets = page_results[:facets] - - halt 404 if evidence.empty? - - pager = Pager.new(start, size, filtered_total) - - options = { - :start => start, - :size => size, - :filters => filter_params, - :metadata => { - :collection_paging => { - :total => collection_total, - :total_filtered => pager.total_size, - :total_pages => pager.total_pages, - :current_page => pager.current_page, - :current_page_size => evidence.size, - } - } - } - - if facets - # group by category/name - hashed_values = Hash.new { |hash, key| hash[key] = [] } - facets.each { |facet| - filter = read_filter(facet['_id']) - category, name = filter.values_at('category', 'name') - next if !category || !name - - key = [category.to_sym, name.to_sym] - facet_obj = { - :value => filter['value'], - :filter => facet['_id'], - :count => facet['count'] - } - hashed_values[key] << facet_obj - } - - if max_values_per_facet == 0 - facet_hashes = hashed_values.map { |(category, name), value_objects| - { - :category => category, - :name => name, - :values => value_objects - } - } - else - facet_hashes = hashed_values.map { |(category, name), value_objects| - { - :category => category, - :name => name, - :values => value_objects.take(max_values_per_facet) - } - } - end - - options[:facets] = facet_hashes - end - - # pager links - options[:previous_page] = pager.previous_page - options[:next_page] = pager.next_page + page_results = @api.find_evidence(filters, start, size, faceted, max_values_per_facet) - render_collection(evidence, :evidence, options) + render_evidence_collection( + 'evidence-export', page_results, start, size, filters, + filtered_total, collection_total, @api + ) end get '/api/evidence/:id' do diff --git a/config/config.yml b/config/config.yml index 9bc44c7..13c729b 100644 --- a/config/config.yml +++ b/config/config.yml @@ -51,12 +51,10 @@ resource_search: # Set a secret used during session creation.... session_secret: 'changeme' -# User authentication using Auth0. +# User authentication via JWTs; see http://jwt.io/introduction/ for more auth: + # Controls whether the API requires authentication enabled: false - redirect: 'https://openbel.auth0.com/authorize?response_type=code&scope=openid%20profile&client_id=K4oAPUaROjbWWTCoAhf0nKYfTGsZWbHE' - default_connection: 'linkedin' - domain: 'openbel.auth0.com' - id: 'K4oAPUaROjbWWTCoAhf0nKYfTGsZWbHE' - # secret: 'auth0 client secret here' + # Used by the auth middleware to decode and verify the JWT + #secret: 'JWT secret here' diff --git a/lib/openbel/api/evidence/api.rb b/lib/openbel/api/evidence/api.rb index ff15c69..9cfb80b 100644 --- a/lib/openbel/api/evidence/api.rb +++ b/lib/openbel/api/evidence/api.rb @@ -15,6 +15,14 @@ def find_evidence(filters = [], offset = 0, length = 0, facet = false) fail NotImplementedError, "#{__method__} is not implemented" end + def find_all_namespace_references + fail NotImplementedError, "#{__method__} is not implemented" + end + + def find_all_annotation_references + fail NotImplementedError, "#{__method__} is not implemented" + end + def count_evidence(filters = []) fail NotImplementedError, "#{__method__} is not implemented" end diff --git a/lib/openbel/api/evidence/facet_filter.rb b/lib/openbel/api/evidence/facet_filter.rb index e9e8248..fd9c461 100644 --- a/lib/openbel/api/evidence/facet_filter.rb +++ b/lib/openbel/api/evidence/facet_filter.rb @@ -72,11 +72,11 @@ def map_metadata_facets(metadata) end def make_filter(category, name, value) - MultiJson.dump({ + { :category => category, :name => name, - :value => value, - }) + :value => value + } end end end diff --git a/lib/openbel/api/evidence/mongo.rb b/lib/openbel/api/evidence/mongo.rb index 64aceb7..85a5cf9 100644 --- a/lib/openbel/api/evidence/mongo.rb +++ b/lib/openbel/api/evidence/mongo.rb @@ -1,4 +1,5 @@ require 'bel' +require 'bel/evidence_model/util' require 'mongo' require_relative 'api' require_relative 'mongo_facet' @@ -11,10 +12,15 @@ class Evidence include Mongo def initialize(options = {}) - host = options[:host] - port = options[:port] - db = options[:database] - @db = MongoClient.new(host, port).db(db) + host = options[:host] + port = options[:port] + db = options[:database] + @db = MongoClient.new( + host, + port, + :op_timeout => nil, + :pool_size => 30 + ).db(db) # Authenticate user if provided. username = options[:username] @@ -25,15 +31,10 @@ def initialize(options = {}) end @collection = @db[:evidence] - @collection.ensure_index( - {:"bel_statement" => Mongo::ASCENDING }, - :background => true - ) - @collection.ensure_index( - {:"$**" => Mongo::TEXT }, - :background => true - ) @evidence_facets = EvidenceFacets.new(options) + + # ensure all indexes are created and maintained + ensure_all_indexes end def create_evidence(evidence) @@ -55,29 +56,16 @@ def find_evidence_by_id(value) @collection.find_one(to_id(value)) end - def find_evidence(filters = [], offset = 0, length = 0, facet = false) - query_hash = to_query(filters) - query_opts = query_options( - query_hash, - :skip => offset, - :limit => length, - :sort => [ - [:bel_statement, :asc] - ] - ) - - results = { - :cursor => @collection.find(query_hash, query_opts) - } - if facet - facets_doc = @evidence_facets.find_facets(query_hash, filters) - results[:facets] = facets_doc["facets"] + def find_evidence(filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1) + if includes_fts_search?(filters) + text_search = get_fts_search(filters) + evidence_aggregate(text_search, filters, offset, length, facet, facet_value_limit) + else + evidence_query(filters, offset, length, facet, facet_value_limit) end - - results end - def find_dataset_evidence(dataset, filters = [], offset = 0, length = 0, facet = false) + def find_dataset_evidence(dataset, filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1) query_hash = to_query(filters) query_hash[:$and] ||= [] query_hash[:$and].unshift({ @@ -97,13 +85,124 @@ def find_dataset_evidence(dataset, filters = [], offset = 0, length = 0, facet = :cursor => @collection.find(query_hash, query_opts) } if facet - facets_doc = @evidence_facets.find_facets(query_hash, filters) - results[:facets] = facets_doc["facets"] + facets_cursor = @evidence_facets.find_facets(query_hash, filters, facet_value_limit) + results[:facets] = facets_cursor.to_a end results end + def find_all_namespace_references + references = @collection.aggregate( + [ + { + :$project => { + "references.namespaces" => 1 + } + }, + { + :$unwind => "$references.namespaces" + }, + { + :$project => { + keyword: "$references.namespaces.keyword", + uri: "$references.namespaces.uri" + } + }, + { + :$group => { + _id: "$keyword", uri: { + :$addToSet => "$uri" + } + } + }, + { + :$unwind => "$uri" + }, + { + :$project => { + keyword: "$_id", uri: "$uri", _id: 0 + } + } + ], + { + allowDiskUse: true, + cursor: {} + } + ) + + union = [] + remap = {} + references.each do |obj| + obj = obj.to_h + obj[:keyword] = obj.delete("keyword") + obj[:uri] = obj.delete("uri") + union, new_remap = BEL::Model.union_namespace_references(union, [obj], 'incr') + remap.merge!(new_remap) + end + + remap + end + + def find_all_annotation_references + references = @collection.aggregate( + [ + { + :$project => {"references.annotations" => 1} + }, + { + :$unwind => "$references.annotations" + }, + { + :$project => { + keyword: "$references.annotations.keyword", + type: "$references.annotations.type", + domain: "$references.annotations.domain" + } + }, + { + :$group => { + _id: "$keyword", + type: { + :$addToSet => "$type" + }, + domain: { + :$addToSet => "$domain" + } + } + }, + { + :$unwind => "$type" + }, + { + :$unwind => "$domain" + }, + { + :$project => { + keyword: "$_id", type: "$type", domain: "$domain", _id: 0 + } + } + ], + { + allowDiskUse: true, + cursor: {} + } + ) + + union = [] + remap = {} + references.each do |obj| + obj = obj.to_h + obj[:keyword] = obj.delete("keyword") + obj[:type] = obj.delete("type") + obj[:domain] = obj.delete("domain") + union, new_remap = BEL::Model.union_annotation_references(union, [obj], 'incr') + remap.merge!(new_remap) + end + + remap + end + def count_evidence(filters = []) query_hash = to_query(filters) @collection.count(:query => query_hash) @@ -123,16 +222,16 @@ def update_evidence_by_id(value, evidence) nil end - def delete_dataset(identifier) - @collection.ensure_index( - {:"_dataset" => Mongo::ASCENDING }, - :background => true - ) + def delete_facets + @evidence_facets.delete_all_facets + end + def delete_dataset(identifier) @collection.remove( { :"_dataset" => identifier }, :j => true ) + @evidence_facets.delete_all_facets end def delete_evidence(value) @@ -173,8 +272,170 @@ def delete_evidence_by_id(value) ) end + def ensure_all_indexes + @collection.ensure_index( + { :bel_statement => Mongo::ASCENDING }, + :background => true + ) + @collection.ensure_index( + { :"$**" => Mongo::TEXT }, + :background => true + ) + @collection.ensure_index( + { :_dataset => Mongo::ASCENDING }, + :background => true + ) + @collection.ensure_index( + { :"experiment_context.name" => Mongo::ASCENDING }, + :background => true + ) + @collection.ensure_index( + { :"metadata.name" => Mongo::ASCENDING }, + :background => true + ) + end + private + def evidence_query(filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1) + query_hash = to_query(filters) + query_opts = query_options( + query_hash, + :skip => offset, + :limit => length, + :sort => [ + [:bel_statement, :asc] + ] + ) + + results = { + :cursor => @collection.find(query_hash, query_opts) + } + if facet + facets_cursor = @evidence_facets.find_facets(query_hash, filters, facet_value_limit) + results[:facets] = facets_cursor.to_a + end + + results + end + + def evidence_aggregate(text_search, filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1) + match_filters = filters.select { |filter| + filter['category'] != 'fts' + } + match = build_filter_query(match_filters) + match[:$and].unshift({ + :$text => { + :$search => text_search + } + }) + + pipeline = [ + { + :$match => match + }, + { + :$project => { + :_id => 1, + :bel_statement => 1, + :score => { + :$meta => 'textScore' + } + } + }, + { + :$sort => { + :score => { + :$meta => 'textScore' + }, + :bel_statement => 1 + } + } + ] + + if offset > 0 + pipeline << { + :$skip => offset + } + end + + if length > 0 + pipeline << { + :$limit => length + } + end + + fts_cursor = @collection.aggregate(pipeline, { + :allowDiskUse => true, + :cursor => { + :batchSize => 0 + } + }) + _ids = fts_cursor.map { |doc| doc['_id'] } + + facets = + if facet + query_hash = to_query(filters) + facets_cursor = @evidence_facets.find_facets(query_hash, filters, facet_value_limit) + facets_cursor.to_a + else + nil + end + + { + :cursor => @collection.find({:_id => {:$in => _ids}}), + :facets => facets + } + end + + def includes_fts_search?(filters) + filters.any? { |filter| + filter['category'] == 'fts' && filter['name'] == 'search' + } + end + + def get_fts_search(filters) + fts_filter = filters.find { |filter| + filter['category'] == 'fts' && filter['name'] == 'search' + } + fts_filter.fetch('value', '') + end + + def build_filter_query(filters = []) + { + :$and => filters.map { |filter| + category = filter['category'] + name = filter['name'] + value = filter['value'] + + case category + when 'experiment_context' + { + :experiment_context => { + :$elemMatch => { + :name => name.to_s, + :value => value.to_s + } + } + } + when 'metadata' + { + :metadata => { + :$elemMatch => { + :name => name.to_s, + :value => value.to_s + } + } + } + else + { + "#{filter['category']}.#{filter['name']}" => filter['value'].to_s + } + end + } + } + end + def to_query(filters = []) if !filters || filters.empty? return {} @@ -238,13 +499,12 @@ def to_id(value) end def remove_evidence_facets(_id) - evidence = @collection.find_one(_id, { - :fields => [ 'facets' ] + doc = @collection.find_one(_id, { + :fields => {:_id => 0, :facets => 1} }) - if evidence && evidence.has_key?('facets') - @evidence_facets.remove_facets_by_filters(evidence['facets']) - @evidence_facets.remove_facets_by_filters + if doc && doc.has_key?('facets') + @evidence_facets.delete_facets(doc['facets']) end end end diff --git a/lib/openbel/api/evidence/mongo_facet.rb b/lib/openbel/api/evidence/mongo_facet.rb index 7d7d3e0..d1bfc7c 100644 --- a/lib/openbel/api/evidence/mongo_facet.rb +++ b/lib/openbel/api/evidence/mongo_facet.rb @@ -12,10 +12,15 @@ class EvidenceFacets include FacetFilter def initialize(options = {}) - host = options[:host] - port = options[:port] - db = options[:database] - @db = MongoClient.new(host, port).db(db) + host = options[:host] + port = options[:port] + db = options[:database] + @db = MongoClient.new( + host, + port, + :op_timeout => nil, + :pool_size => 30 + ).db(db) # Authenticate user if provided. username = options[:username] @@ -25,90 +30,192 @@ def initialize(options = {}) @db.authenticate(username, password, nil, auth_db) end - @evidence = @db[:evidence] - @evidence_facets = @db[:evidence_facets] + @evidence = @db[:evidence] + @evidence_facet_cache = @db[:evidence_facet_cache] + + # ensure all indexes are created and maintained + ensure_all_indexes end - def create_facets(_id, query_hash) - # create and save facets, identified by query - facets_doc = _id.merge({ - :facets => evidence_facets(query_hash) - }) - @evidence_facets.save(facets_doc, :j => true) + def find_facets(query, filters, facet_value_limit = -1) + sorted_filters = sort_filters(filters) + cache_collection = facet_cache_collection(sorted_filters) + + if no_collection?(cache_collection) + cache_collection = "evidence_facet_cache_#{EvidenceFacets.generate_uuid}" + create_aggr = create_aggregation(cache_collection, query) + @evidence.aggregate(create_aggr[:pipeline], create_aggr[:options]) + @evidence_facet_cache.insert({ + :filters => sorted_filters, + :cache_collection => cache_collection + }) + end + + # set up field projection based on value limit + field_projection = { + :_id => 0, + :category => 1, + :name => 1, + :values => 1 + } + if facet_value_limit > 0 + field_projection[:values] = {:$slice => facet_value_limit} + end - # return facets document - facets_doc + # cursor facets and apply "filter" + @db[cache_collection].find({}, :fields => field_projection).map { |facet_doc| + category, name = facet_doc.values_at('category', 'name') + facet_doc['values'].each do |value| + value[:filter] = MultiJson.dump({ + :category => category, + :name => name, + :value => value['value'] + }) + end + facet_doc + } end - def find_facets(query_hash, filters) - _id = {:_id => to_facets_id(filters)} - @evidence_facets.find_one(_id) || create_facets(_id, query_hash) + def delete_facets(facets) + # Add zero-filter to facets; clears the default search + facets = facets.to_a + facets.unshift([]) + + # Drop facet cache collections + @evidence_facet_cache.find( + {:filters => {:$in => facets}}, + :fields => {:_id => 0, :cache_collection => 1} + ).each do |doc| + cache_collection = doc['cache_collection'] + @db[cache_collection].drop() + end + + # remove filter match entries in evidence_facet_cache + @evidence_facet_cache.remove({:filters => {:$in => facets}}) end - def remove_facets_by_filters(filters = [], options = {}) - remove_spec = - if filters.empty? - { :_id => "" } - else - { - :_id => { - :$in => filters.map { |filter| - to_regexp(MultiJson.load(filter)) - } - } - } - end - @evidence_facets.remove(remove_spec, :j => true) + def delete_all_facets + @evidence_facet_cache.find( + {}, + :fields => {:_id => 0, :cache_collection => 1} + ).each do |doc| + cache_collection = doc['cache_collection'] + @db[cache_collection].drop() + end + + # remove all entries in evidence_facet_cache + @evidence_facet_cache.remove({}) + end + + def ensure_all_indexes + @evidence_facet_cache.ensure_index([ + [:"filters.category", Mongo::ASCENDING], + [:"filters.name", Mongo::ASCENDING] + ], + :background => true + ) end private - def to_regexp(filter) - filter_s = "#{filter['category']}|#{filter['name']}|#{filter['value']}" - %r{.*#{Regexp.escape(filter_s)}.*} + def no_collection?(collection) + !collection || !@db.collection_names.include?(collection) end + def sort_filters(filters) + filters.sort { |f1, f2| + f1_array = f1.values_at(:category, :name, :value) + f2_array = f2.values_at(:cat, :name, :value) - def to_facets_id(filters) - filters.map { |filter| - "#{filter['category']}|#{filter['name']}|#{filter['value']}" - }.sort.join(',') + f1_array <=> f2_array + } end - def evidence_facets(query_hash = nil) - pipeline = - if query_hash.is_a?(Hash) && !query_hash.empty? - [{:'$match' => query_hash}] + AGGREGATION_PIPELINE - else - AGGREGATION_PIPELINE - end - @evidence.aggregate(pipeline) + def facet_cache_collection(filters) + result = @evidence_facet_cache.find_one( + {:filters => filters}, + :fields => {:cache_collection => 1, :_id => 0} + ) + + result && result['cache_collection'] end - AGGREGATION_PIPELINE = [ - { - :'$project' => { - :_id => 0, - :facets => 1 - } - }, - { - :'$unwind' => '$facets' - }, + def create_aggregation(out_collection, match_query = {}, options = {}) + pipeline = CREATE_AGGREGATION[:pipeline] + [{ :$out => out_collection }] + unless match_query.empty? + pipeline.unshift({ :$match => match_query }) + end + { - :'$group' => { - :_id => '$facets', - :count => { - :'$sum' => 1 + :pipeline => pipeline, + :options => CREATE_AGGREGATION[:options].merge(options) + } + end + + # Define the aggregation pipeline + CREATE_AGGREGATION = { + :pipeline => [ + { + :$project => { + :_id => 0, + :facets => 1 + } + }, + { + :$unwind => '$facets' + }, + { + :$group => { + :_id => '$facets', + :count => { + :$sum => 1 + } + } + }, + { + :$sort => { + :count => -1 + } + }, + { + :$group => { + :_id => { + :category => '$_id.category', + :name => '$_id.name' + }, + :values => { + :$push => { + :value => '$_id.value', + :count => '$count' + } + } + } + }, + { + :$project => { + :category => '$_id.category', + :name => '$_id.name', + :values => { :$slice => ['$values', 1000] } } } - }, - { - :'$sort' => { - :count => -1 - } + ], + :options => { + :allowDiskUse => true } - ] + } + + # Define UUID implementation based on Ruby. + if RUBY_ENGINE =~ /^jruby/i + java_import 'java.util.UUID' + define_singleton_method(:generate_uuid) do + Java::JavaUtil::UUID.random_uuid.to_s + end + else + require 'uuid' + define_singleton_method(:generate_uuid) do + UUID.generate + end + end end end end diff --git a/spec/evidence/api_evidence_spec.rb b/spec/evidence/api_evidence_spec.rb index 9257e55..d5f170c 100644 --- a/spec/evidence/api_evidence_spec.rb +++ b/spec/evidence/api_evidence_spec.rb @@ -12,7 +12,7 @@ expect(response.status).to eql(404) end - it 'returns an array when the resource collection is non-empty' do + it 'returns an object when the resource collection is non-empty' do # create response = api_conn.post('/api/evidence') { |req| req.headers['Content-Type'] = 'application/json; charset=utf-8' @@ -22,8 +22,9 @@ expect(response['Location']).not_to be_empty location = response['Location'] - expect(evidence_api._resource['evidence']).to be_an(Array) - expect(evidence_api._resource['evidence'].size).to eql(1) + expect(evidence_api._resource._response.body).to be_a(Hash) + expect(evidence_api._resource._response.body).to include('evidence_collection') + expect(evidence_api._resource['evidence_collection']).to be_an(Array) # clean up api_conn.delete location @@ -65,8 +66,8 @@ evidence_resource = api_conn.get { |req| req.url '/api/evidence' }.body - expect(evidence_resource['evidence']).not_to be_nil - expect(evidence_resource['evidence'].size).to eql(10) + expect(evidence_resource['evidence_collection']).not_to be_nil + expect(evidence_resource['evidence_collection'].size).to eql(10) expect(evidence_resource['metadata']).not_to be_nil expect(evidence_resource['metadata']['collection_paging']).not_to be_nil paging = evidence_resource['metadata']['collection_paging'] @@ -120,8 +121,8 @@ } ) }.body - expect(evidence_resource['evidence']).not_to be_nil - expect(evidence_resource['evidence'].size).to eql(5) + expect(evidence_resource['evidence_collection']).not_to be_nil + expect(evidence_resource['evidence_collection'].size).to eql(5) expect(evidence_resource['metadata']).not_to be_nil expect(evidence_resource['metadata']['collection_paging']).not_to be_nil paging = evidence_resource['metadata']['collection_paging'] diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 8ef47d6..eb06ca4 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -8,17 +8,18 @@ HAL = 'application/hal+json' HAL_REGEX = Regexp.escape(HAL) HTTP_OK = 200 +TEST_DEBUG = ENV['TEST_DEBUG'] def api_root ENV['API_ROOT_URL'] || (raise RuntimeError.new('API_ROOT_URL is not set')) end def root_resource(resource_name) - api_client = Hyperclient.new(api_root) do |c| + api_client = Hyperclient.new(api_root) do |c| c.connection do |conn| conn.adapter Faraday.default_adapter - #conn.response :logger conn.response :json, :content_type => 'application/hal+json' + conn.response :logger if TEST_DEBUG end end api_client.headers['Content-Type'] = 'application/json' @@ -37,8 +38,8 @@ def root_resource(resource_name) def api_conn Faraday.new(:url => api_root) do |builder| builder.adapter Faraday.default_adapter - #builder.response :logger builder.response :json, :content_type => 'application/hal+json' + builder.response :logger if TEST_DEBUG end end diff --git a/spec/test_data/example_evidence.json b/spec/test_data/example_evidence.json index fae82a3..3b622a0 100644 --- a/spec/test_data/example_evidence.json +++ b/spec/test_data/example_evidence.json @@ -6,7 +6,7 @@ "name": "Trends in molecular medicine", "id": "12928037", "date": "2003-08-09", - "authors": "de Nigris F|Lerman A|Ignarro LJ|Williams-Ignarro S|Sica V|Baker AH|Lerman LO|Geng YJ|Napoli C", + "authors": ["de Nigris F", "Lerman A"], "comment": "Primary literature from (Trends in Molecular Medicine)." }, "summary_text": "Aging, one of the major predictors for atherosclerotic lesion formation, increases\nthe sensitivity of endothelial cells to apoptosis induced by in vitro and in vivo\nstimuli [35–37].", @@ -14,98 +14,40 @@ { "name": "Disease", "value": "atherosclerosis", - "uri": "http://www.openbel.org/bel/namespace/disease-ontology/atherosclerosis", - "url": "http://www.openbel.org/bel/namespace/disease-ontology" - }, - { - "name": "Anatomy", - "value": "artery", - "uri": "http://www.openbel.org/bel/namespace/uberon/artery", - "url": "http://www.openbel.org/bel/namespace/uberon" - }, - { - "name": "TextLocation", - "value": "Review" - }, + "uri": "http://www.openbel.org/bel/namespace/disease-ontology/atherosclerosis" + } + ], + "metadata": [ { - "name": "Cell", - "value": "endothelial cell", - "uri": "http://www.openbel.org/bel/namespace/cell-ontology/endothelial cell", - "url": "http://www.openbel.org/bel/namespace/cell-ontology" + "name": "document_header", + "value": { + "Name": "BEL Framework Small Corpus Document", + "Description": "Approximately 2000 hand curated statements drawn from 57 PubMeds.", + "Version": "20131211", + "Copyright": "Copyright (c) 2011-2012, Selventa. All Rights Reserved.", + "Authors": "Selventa", + "Licenses": "Creative Commons Attribution-Non-Commercial-ShareAlike 3.0 Unported License", + "ContactInfo": "support@belframework.org" + } }, { - "name": "MeSHAnatomy", - "value": "Muscle, Smooth, Vascular", - "uri": "http://www.openbel.org/bel/namespace/mesh-anatomy/Muscle, Smooth, Vascular", - "url": "http://www.openbel.org/bel/namespace/mesh-anatomy" + "name": "dataset", + "value": "BEL Framework Example 1 Document" } ], - "metadata": { - "document_header": { - "Name": "BEL Framework Small Corpus Document", - "Description": "Approximately 2000 hand curated statements drawn from 57 PubMeds.", - "Version": "20131211", - "Copyright": "Copyright (c) 2011-2012, Selventa. All Rights Reserved.", - "Authors": "Selventa", - "Licenses": "Creative Commons Attribution-Non-Commercial-ShareAlike 3.0 Unported License", - "ContactInfo": "support@belframework.org" - }, - "namespace_definitions": { - "CHEBI": "http://www.openbel.org/bel/namespace/chebi", - "CHEBIID": "http://www.openbel.org/bel/namespace/chebi", - "EGID": "http://www.openbel.org/bel/namespace/entrez-gene", - "GOBP": "http://www.openbel.org/bel/namespace/go-biological-process", - "HGNC": "http://www.openbel.org/bel/namespace/hgnc-human-genes", - "MESHCS": "http://www.openbel.org/bel/namespace/mesh-cellular-structures", - "MESHD": "http://www.openbel.org/bel/namespace/mesh-diseases", - "MESHPP": "http://www.openbel.org/bel/namespace/mesh-processes", - "MGI": "http://www.openbel.org/bel/namespace/mgi-mouse-genes", - "RGD": "http://www.openbel.org/bel/namespace/rgd-rat-genes", - "SCHEM": "http://www.openbel.org/bel/namespace/selventa-legacy-chemicals", - "SCOMP": "http://www.openbel.org/bel/namespace/selventa-named-complexes", - "SDIS": "http://www.openbel.org/bel/namespace/selventa-legacy-diseases", - "SFAM": "http://www.openbel.org/bel/namespace/selventa-protein-families", - "SPAC": "http://www.openbel.org/bel/namespace/swissprot" - }, - "annotation_definitions": { - "Anatomy": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/uberon" - }, - "Cell": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/cell-ontology" - }, - "CellLine": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/cell-line-ontology" - }, - "CellStructure": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/mesh-cellular-structures" - }, - "Disease": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/disease-ontology" - }, - "MeSHAnatomy": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/mesh-anatomy" - }, - "Species": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/ncbi-taxonomy" - }, - "TextLocation": { - "type": "list", - "domain": [ - "Abstract", - "Results", - "Legend", - "Review" - ] + "references": { + "namespaces": [ + { + "keyword": "GOBP", + "uri": "http://www.openbel.org/bel/namespace/go-biological-process" } - } + ], + "annotations": [ + { + "keyword": "Disease", + "uri": "http://www.openbel.org/bel/namespace/disease-ontology" + } + ] } } } diff --git a/spec/test_data/human_evidence.json b/spec/test_data/human_evidence.json index 0b9fb04..5f8a92b 100644 --- a/spec/test_data/human_evidence.json +++ b/spec/test_data/human_evidence.json @@ -6,7 +6,7 @@ "name": "Trends in molecular medicine", "id": "12928037", "date": "2003-08-09", - "authors": "de Nigris F|Lerman A|Ignarro LJ|Williams-Ignarro S|Sica V|Baker AH|Lerman LO|Geng YJ|Napoli C", + "authors": ["de Nigris F", "Lerman A"], "comment": "Primary literature from (Trends in Molecular Medicine)." }, "summary_text": "TIMP-3 can be anti-angiogenic by direct binding to vascular endothelial growth factor (VEGF) receptor 2 in a matrix-metalloproteinase- independent manner [94].", @@ -16,72 +16,41 @@ "value": "Homo sapiens" } ], - "metadata": { - "document_header": { - "Name": "BEL Framework Small Corpus Document", - "Description": "Approximately 2000 hand curated statements drawn from 57 PubMeds.", - "Version": "20131211", - "Copyright": "Copyright (c) 2011-2012, Selventa. All Rights Reserved.", - "Authors": "Selventa", - "Licenses": "Creative Commons Attribution-Non-Commercial-ShareAlike 3.0 Unported License", - "ContactInfo": "support@belframework.org" - }, - "namespace_definitions": { - "CHEBI": "http://www.openbel.org/bel/namespace/chebi", - "CHEBIID": "http://www.openbel.org/bel/namespace/chebi", - "EGID": "http://www.openbel.org/bel/namespace/entrez-gene", - "GOBP": "http://www.openbel.org/bel/namespace/go-biological-process", - "HGNC": "http://www.openbel.org/bel/namespace/hgnc-human-genes", - "MESHCS": "http://www.openbel.org/bel/namespace/mesh-cellular-structures", - "MESHD": "http://www.openbel.org/bel/namespace/mesh-diseases", - "MESHPP": "http://www.openbel.org/bel/namespace/mesh-processes", - "MGI": "http://www.openbel.org/bel/namespace/mgi-mouse-genes", - "RGD": "http://www.openbel.org/bel/namespace/rgd-rat-genes", - "SCHEM": "http://www.openbel.org/bel/namespace/selventa-legacy-chemicals", - "SCOMP": "http://www.openbel.org/bel/namespace/selventa-named-complexes", - "SDIS": "http://www.openbel.org/bel/namespace/selventa-legacy-diseases", - "SFAM": "http://www.openbel.org/bel/namespace/selventa-protein-families", - "SPAC": "http://www.openbel.org/bel/namespace/swissprot" + "metadata": [ + { + "name": "document_header", + "value": { + "Name": "BEL Framework Small Corpus Document", + "Description": "Approximately 2000 hand curated statements drawn from 57 PubMeds.", + "Version": "20131211", + "Copyright": "Copyright (c) 2011-2012, Selventa. All Rights Reserved.", + "Authors": "Selventa", + "Licenses": "Creative Commons Attribution-Non-Commercial-ShareAlike 3.0 Unported License", + "ContactInfo": "support@belframework.org" + } }, - "annotation_definitions": { - "Anatomy": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/uberon" - }, - "Cell": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/cell-ontology" - }, - "CellLine": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/cell-line-ontology" - }, - "CellStructure": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/mesh-cellular-structures" - }, - "Disease": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/disease-ontology" - }, - "MeSHAnatomy": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/mesh-anatomy" - }, - "Species": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/ncbi-taxonomy" + { + "name": "dataset", + "value": "BEL Framework Example 1 Document" + } + ], + "references": { + "namespaces": [ + { + "keyword": "GOBP", + "uri": "http://www.openbel.org/bel/namespace/go-biological-process" }, - "TextLocation": { - "type": "list", - "domain": [ - "Abstract", - "Results", - "Legend", - "Review" - ] + { + "keyword": "HGNC", + "uri": "http://www.openbel.org/bel/namespace/hgnc-human-genes" } - } + ], + "annotations": [ + { + "keyword": "Ncbi Taxonomy", + "uri": "http://www.openbel.org/bel/namespace/ncbi-taxonomy" + } + ] } } } diff --git a/spec/test_data/mouse_evidence.json b/spec/test_data/mouse_evidence.json index 4c78abd..af7d438 100644 --- a/spec/test_data/mouse_evidence.json +++ b/spec/test_data/mouse_evidence.json @@ -6,7 +6,7 @@ "name": "Cell", "id": "16962653", "date": "2006-10-07", - "authors": "Jacinto E|Facchinetti V|Liu D|Soto N|Wei S|Jung SY|Huang Q|Qin J|Su B", + "authors": ["de Nigris F", "Lerman A"], "comment": "" }, "summary_text": "We next examined the Akt T-loop Thr308 phosphorylation in wild-type and SIN1−/− cells. We found that although Ser473 phosphorylation was completely abolished in the SIN1−/− cells, Thr308 phosphorylation of Akt was not blocked (Figure 3A).", @@ -16,72 +16,37 @@ "value": "Mus musculus" } ], - "metadata": { - "document_header": { - "Name": "BEL Framework Small Corpus Document", - "Description": "Approximately 2000 hand curated statements drawn from 57 PubMeds.", - "Version": "20131211", - "Copyright": "Copyright (c) 2011-2012, Selventa. All Rights Reserved.", - "Authors": "Selventa", - "Licenses": "Creative Commons Attribution-Non-Commercial-ShareAlike 3.0 Unported License", - "ContactInfo": "support@belframework.org" - }, - "namespace_definitions": { - "CHEBI": "http://www.openbel.org/bel/namespace/chebi", - "CHEBIID": "http://www.openbel.org/bel/namespace/chebi", - "EGID": "http://www.openbel.org/bel/namespace/entrez-gene", - "GOBP": "http://www.openbel.org/bel/namespace/go-biological-process", - "HGNC": "http://www.openbel.org/bel/namespace/hgnc-human-genes", - "MESHCS": "http://www.openbel.org/bel/namespace/mesh-cellular-structures", - "MESHD": "http://www.openbel.org/bel/namespace/mesh-diseases", - "MESHPP": "http://www.openbel.org/bel/namespace/mesh-processes", - "MGI": "http://www.openbel.org/bel/namespace/mgi-mouse-genes", - "RGD": "http://www.openbel.org/bel/namespace/rgd-rat-genes", - "SCHEM": "http://www.openbel.org/bel/namespace/selventa-legacy-chemicals", - "SCOMP": "http://www.openbel.org/bel/namespace/selventa-named-complexes", - "SDIS": "http://www.openbel.org/bel/namespace/selventa-legacy-diseases", - "SFAM": "http://www.openbel.org/bel/namespace/selventa-protein-families", - "SPAC": "http://www.openbel.org/bel/namespace/swissprot" - }, - "annotation_definitions": { - "Anatomy": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/uberon" - }, - "Cell": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/cell-ontology" - }, - "CellLine": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/cell-line-ontology" - }, - "CellStructure": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/mesh-cellular-structures" - }, - "Disease": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/disease-ontology" - }, - "MeSHAnatomy": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/mesh-anatomy" - }, - "Species": { - "type": "url", - "domain": "http://www.openbel.org/bel/namespace/ncbi-taxonomy" - }, - "TextLocation": { - "type": "list", - "domain": [ - "Abstract", - "Results", - "Legend", - "Review" - ] + "metadata": [ + { + "name": "document_header", + "value": { + "Name": "BEL Framework Small Corpus Document", + "Description": "Approximately 2000 hand curated statements drawn from 57 PubMeds.", + "Version": "20131211", + "Copyright": "Copyright (c) 2011-2012, Selventa. All Rights Reserved.", + "Authors": "Selventa", + "Licenses": "Creative Commons Attribution-Non-Commercial-ShareAlike 3.0 Unported License", + "ContactInfo": "support@belframework.org" } + }, + { + "name": "dataset", + "value": "BEL Framework Example 1 Document" } + ], + "references": { + "namespaces": [ + { + "keyword": "MGI", + "uri": "http://www.openbel.org/bel/namespace/mgi-mouse-genes" + } + ], + "annotations": [ + { + "keyword": "Ncbi Taxonomy", + "uri": "http://www.openbel.org/bel/namespace/ncbi-taxonomy" + } + ] } } } diff --git a/tools/migrations/0.6.0/README.md b/tools/migrations/0.6.0/README.md new file mode 100644 index 0000000..1629ac2 --- /dev/null +++ b/tools/migrations/0.6.0/README.md @@ -0,0 +1,44 @@ +## MongoDB Migrations for 0.6.0 + +The 0.6.0 version of OpenBEL API introduces a change to how evidence facets are stored in MongoDB. + +### Change Detail + +#### 0.5.1 + +Collections: + +- `evidence` + - Stores evidence.facets as strings. +- `evidence_facets` + - Stores evidence facet objects for all searches. + +#### 0.6.0 + +Collections: + +- `evidence` + - Stores evidence.facets as JSON objects for use in Mongo aggregation operations. +- `evidence_facet_cache` + - Stores the facet collection name for each unique evidence search. +- `evidence_facet_cache_{UUID}` + - Stores evidence facet objects for a specific evidence search. + +### Migration Procedure + +The migrations are JRuby scripts that can be run directly as scripts (i.e. includes `#!/usr/bin/env jruby` shebang). You will need the OpenBEL API repository on GitHub as well as your OpenBEL API configuration file. + +It is recommended to stop OpenBEL API and MongoDB before migrating. + +1. Stop OpenBEL API. +2. Stop MongoDB daemon. +3. Clone OpenBEL API repository. + - `git clone https://github.com/OpenBEL/openbel-api.git` +4. Change directory to the 0.6.0 migrations directory. + - `cd openbel-api/tools/migrations/0.6.0` +5. Run *migrate_evidence_facets.rb* to update evidence.facets to JSON objects. + - `./migrate_evidence_facets.rb YOUR_CONFIG.yml` or `jruby migrate_evidence_facets.rb YOUR_CONFIG.yml` +6. Run *drop_unused_collection.rb* to remove the old *evidence_facets* collection. + - `./drop_unused_collection.rb YOUR_CONFIG.yml` or `jruby drop_unused_collection.rb YOUR_CONFIG.yml` +7. Start MongoDB daemon. +8. Start OpenBEL API. diff --git a/tools/migrations/0.6.0/clear_evidence_facets_cache.rb b/tools/migrations/0.6.0/clear_evidence_facets_cache.rb new file mode 100755 index 0000000..3d75850 --- /dev/null +++ b/tools/migrations/0.6.0/clear_evidence_facets_cache.rb @@ -0,0 +1,75 @@ +#!/usr/bin/env jruby + +# Clears out evidence facet caches that may have been built before evidence +# documents were migrated for 0.6.0. +# +# Mongo migration: +# - Drops all evidence_facet_cache_* collections. +# - Removes all documents from evidence_facet_caches that referenced the +# dropped collections. +# + +require 'openbel/api/config' +require 'openbel/api/version' + +VERSION_REQUIREMENT = "0.6.0" +ACTIVE_VERSION = OpenBEL::Version::STRING + +ENV['OPENBEL_API_CONFIG_FILE'] ||= (ARGV.first || ENV['OPENBEL_API_CONFIG_FILE']) + +unless ENV['OPENBEL_API_CONFIG_FILE'] + $stderr.puts "usage: clear_evidence_facets_cache.rb [CONFIG FILE]\n" + $stderr.puts "Alternatively set the environment variable OPENBEL_API_CONFIG_FILE" + exit 1 +end + +def setup_mongo(cfg) + require 'mongo' + + host = cfg[:host] + port = cfg[:port] + db = Mongo::MongoClient.new(host, port, + :op_timeout => 300 + ).db(cfg[:database]) + + # Authenticate user if provided. + username = cfg[:username] + password = cfg[:password] + if username && password + auth_db = cfg[:authentication_database] || db + db.authenticate(username, password, nil, auth_db) + end + + db +end + +def migrate(mongo) + if mongo.collection_names.include?('evidence_facet_cache') + mongo['evidence_facet_cache'].remove({}) + puts %Q{Removing documents from "evidence_facet_cache" (success).} + end + + mongo.collection_names.select { |name| + name =~ /^evidence_facet_cache_[0-9a-f\-]+$/ + }.each do |name| + mongo.drop_collection(name) + puts %Q{Dropped "#{name}" collection (success).} + end + + true +end + +case ACTIVE_VERSION +when VERSION_REQUIREMENT + + cfg = OpenBEL::Config.load! + migrate( + setup_mongo(cfg[:evidence_store][:mongo]) + ) + exit 0 +else + + $stderr.puts %Q{Migration is intended for version "#{VERSION_REQUIREMENT}".} + $stderr.puts %Q{Version "#{ACTIVE_VERSION}" is currently installed.} + exit 1 +end diff --git a/tools/migrations/0.6.0/drop_unused_collection.rb b/tools/migrations/0.6.0/drop_unused_collection.rb new file mode 100755 index 0000000..535a8ac --- /dev/null +++ b/tools/migrations/0.6.0/drop_unused_collection.rb @@ -0,0 +1,67 @@ +#!/usr/bin/env jruby + +# Mongo migration: +# - Drops the now unused "evidence_facets" collection. +# - Replaced by the "evidence_facet_cache" collection plus individual UUID cache collections. +# - Idempotent (i.e. Safe to run multiple times.) +# + +require 'openbel/api/config' +require 'openbel/api/version' + +VERSION_REQUIREMENT = "0.6.0" +ACTIVE_VERSION = OpenBEL::Version::STRING + +ENV['OPENBEL_API_CONFIG_FILE'] ||= (ARGV.first || ENV['OPENBEL_API_CONFIG_FILE']) + +unless ENV['OPENBEL_API_CONFIG_FILE'] + $stderr.puts "usage: drop_unused_collection.rb [CONFIG FILE]\n" + $stderr.puts "Alternatively set the environment variable OPENBEL_API_CONFIG_FILE" + exit 1 +end + +def setup_mongo(cfg) + require 'mongo' + + host = cfg[:host] + port = cfg[:port] + db = Mongo::MongoClient.new(host, port, + :op_timeout => 300 + ).db(cfg[:database]) + + # Authenticate user if provided. + username = cfg[:username] + password = cfg[:password] + if username && password + auth_db = cfg[:authentication_database] || db + db.authenticate(username, password, nil, auth_db) + end + + db +end + +def migrate(mongo) + if mongo.collection_names.include?('evidence_facets') + mongo.drop_collection('evidence_facets') + puts %Q{Dropped "evidence_facets" collection (success).} + else + puts %Q{The "evidence_facets" collection does not exist. Nothing to migrate (success).} + end + + true +end + +case ACTIVE_VERSION +when VERSION_REQUIREMENT + + cfg = OpenBEL::Config.load! + migrate( + setup_mongo(cfg[:evidence_store][:mongo]) + ) + exit 0 +else + + $stderr.puts %Q{Migration is intended for version "#{VERSION_REQUIREMENT}".} + $stderr.puts %Q{Version "#{ACTIVE_VERSION}" is currently installed.} + exit 1 +end diff --git a/tools/migrations/0.6.0/migrate_evidence_facets.rb b/tools/migrations/0.6.0/migrate_evidence_facets.rb new file mode 100755 index 0000000..625c22d --- /dev/null +++ b/tools/migrations/0.6.0/migrate_evidence_facets.rb @@ -0,0 +1,105 @@ +#!/usr/bin/env jruby + +# Mongo migration: +# - Converts "evidence.facets" from JSON strings to objects in the document: +# - Each facet will be expanded from a JSON string to: +# { +# category: "...", +# name: "...", +# value: "..." +# } +# - Idempotent (i.e. Safe to run multiple times.) +# + +require 'openbel/api/config' +require 'openbel/api/version' + +VERSION_REQUIREMENT = "0.6.0" +ACTIVE_VERSION = OpenBEL::Version::STRING + +ENV['OPENBEL_API_CONFIG_FILE'] ||= (ARGV.first || ENV['OPENBEL_API_CONFIG_FILE']) + +unless ENV['OPENBEL_API_CONFIG_FILE'] + $stderr.puts "usage: migrate_evidence_facets.rb [CONFIG FILE]\n" + $stderr.puts "Alternatively set the environment variable OPENBEL_API_CONFIG_FILE" + exit 1 +end + +def setup_mongo(cfg) + require 'mongo' + + host = cfg[:host] + port = cfg[:port] + db = Mongo::MongoClient.new(host, port, + :op_timeout => 300 + ).db(cfg[:database]) + + # Authenticate user if provided. + username = cfg[:username] + password = cfg[:password] + if username && password + auth_db = cfg[:authentication_database] || db + db.authenticate(username, password, nil, auth_db) + end + + db +end + +def migrate(mongo) + require 'multi_json' + + count = 0 + skipped = 0 + evidence_collection = mongo[:evidence] + evidence_collection.find do |cursor| + cursor.each do |doc| + facets = doc['facets'] + unless facets.empty? + skip = true + facets.map! do |facet| + if facet.is_a?(String) + skip = false + MultiJson.load(facet) + else + facet + end + end + + if skip + skipped += 1 + else + evidence_collection.update( + {:_id => doc['_id']}, + { + :$set => { + :facets => facets + } + } + ) + count += 1 + end + puts "...#{count} evidence migrated" if count > 0 && (count % 100).zero? + end + end + end + + puts "Total of #{count} evidence migrated. Skipped #{skipped} evidence (success)." + true +end + +case ACTIVE_VERSION +when VERSION_REQUIREMENT + + cfg = OpenBEL::Config.load! + migrate( + setup_mongo(cfg[:evidence_store][:mongo]) + ) + + $stdout.puts %Q{Successfully migrated "facets" field of documents in "evidence" collection from strings to full objects.} + exit 0 +else + + $stderr.puts %Q{Migration is intended for version "#{VERSION_REQUIREMENT}".} + $stderr.puts %Q{Version "#{ACTIVE_VERSION}" is currently installed.} + exit 1 +end