From b816ada0cc09a374abdddf195c7986668862a26b Mon Sep 17 00:00:00 2001 From: Jarek Radosz Date: Tue, 20 Apr 2021 10:14:14 +0200 Subject: [PATCH] DEV: General refactoring and cleanup (#474) Included: * DEV: Remove some defunct sites * DEV: Refactor GoogleDocsOnebox * DEV: Refactor PastebinOnebox * DEV: Refactor GfycatOnebox * DEV: Refactor PdfOnebox * DEV: Refactor PubmedOnebox * DEV: Refactor AmazonOnebox * DEV: Minor InstagramOnebox clean up * DEV: Small tweaks to TrelloOnebox Other stuff: * DEV: Don't use `link` and `url` interchangeably * DEV: Roll `Layout#link` into `Layout#uri` * DEV: Properly memoize `Layout#uri` * DEV: Re-use `Layout#uri` * DEV: Rename `get_secure_image` to `secure_image_url` * DEV: Normalize `matches_regexp` calls * DEV: Use path-specific encoding rules * DEV: `Layout#checksum` is no longer used * DEV: Remove double `protected` * DEV: Refactor `Matcher` * DEV: Use the existing `Engine#uri` method * DEV: The first `Engine#initialize` arg is `url` * DEV: Use `match?` * DEV: Simplify allowlistedgeneric template * DEV: Initialize `@options` in json spec * DEV: Simplify conditionals * DEV: Merge attr_readers * DEV: Whitespace, `after()` before tests --- lib/onebox/engine.rb | 16 +- .../engine/allowlisted_generic_onebox.rb | 9 - lib/onebox/engine/amazon_onebox.rb | 39 ++-- lib/onebox/engine/flickr_onebox.rb | 4 +- lib/onebox/engine/gfycat_onebox.rb | 52 ++--- lib/onebox/engine/github_commit_onebox.rb | 2 +- lib/onebox/engine/github_folder_onebox.rb | 2 +- lib/onebox/engine/google_docs_onebox.rb | 56 +++--- lib/onebox/engine/google_maps_onebox.rb | 16 +- lib/onebox/engine/google_photos_onebox.rb | 12 +- lib/onebox/engine/imgur_onebox.rb | 4 +- lib/onebox/engine/instagram_onebox.rb | 5 +- lib/onebox/engine/pastebin_onebox.rb | 26 ++- lib/onebox/engine/pdf_onebox.rb | 22 +-- lib/onebox/engine/pubmed_onebox.rb | 28 +-- lib/onebox/engine/stack_exchange_onebox.rb | 2 +- lib/onebox/engine/standard_embed.rb | 3 - lib/onebox/engine/trello_onebox.rb | 9 +- lib/onebox/engine/youku_onebox.rb | 6 - lib/onebox/layout.rb | 16 +- lib/onebox/matcher.rb | 18 +- lib/onebox/mixins/git_blob_onebox.rb | 8 +- lib/onebox/open_graph.rb | 8 +- lib/onebox/preview.rb | 4 +- spec/fixtures/googledocs.response | 182 ++++++++++++++++++ .../onebox/engine/google_docs_onebox_spec.rb | 34 ++-- spec/lib/onebox/engine/json_spec.rb | 1 + spec/lib/onebox/layout_spec.rb | 8 +- spec/lib/onebox/matcher_spec.rb | 3 +- spec/lib/onebox/oembed_spec.rb | 1 - spec/lib/onebox/open_graph_spec.rb | 2 - spec/lib/onebox/preview_spec.rb | 4 - templates/allowlistedgeneric.mustache | 15 +- 33 files changed, 369 insertions(+), 248 deletions(-) create mode 100644 spec/fixtures/googledocs.response diff --git a/lib/onebox/engine.rb b/lib/onebox/engine.rb index 4986b32e..9f641808 100644 --- a/lib/onebox/engine.rb +++ b/lib/onebox/engine.rb @@ -28,23 +28,19 @@ def self.origins_to_regexes(origins) end end - attr_reader :url, :uri - attr_reader :timeout + attr_reader :url, :uri, :options, :timeout attr :errors DEFAULT = {} - def options - @options - end def options=(opt) - return @options if opt.nil? #make sure options provided - opt = opt.to_h if opt.instance_of?(OpenStruct) + return @options if opt.nil? # make sure options provided + opt = opt.to_h if opt.instance_of?(OpenStruct) @options.merge!(opt) @options end - def initialize(link, timeout = nil) + def initialize(url, timeout = nil) @errors = {} @options = DEFAULT class_name = self.class.name.split("::").last.to_s @@ -52,8 +48,8 @@ def initialize(link, timeout = nil) # Set the engine options extracted from global options. self.options = Onebox.options[class_name] || {} - @url = link - @uri = URI(link) + @url = url + @uri = URI(url) if always_https? @uri.scheme = 'https' @url = @uri.to_s diff --git a/lib/onebox/engine/allowlisted_generic_onebox.rb b/lib/onebox/engine/allowlisted_generic_onebox.rb index 0397296b..f04e064a 100644 --- a/lib/onebox/engine/allowlisted_generic_onebox.rb +++ b/lib/onebox/engine/allowlisted_generic_onebox.rb @@ -27,7 +27,6 @@ def self.default_allowed_domains 500px.com 8tracks.com abc.net.au - about.com answers.com arstechnica.com ask.com @@ -36,11 +35,9 @@ def self.default_allowed_domains bbs.boingboing.net bestbuy.ca bestbuy.com - blip.tv bloomberg.com businessinsider.com change.org - clikthrough.com cnet.com cnn.com codepen.io @@ -90,7 +87,6 @@ def self.default_allowed_domains meetup.com mixcloud.com mlb.com - myshopify.com myspace.com nba.com npr.org @@ -98,16 +94,13 @@ def self.default_allowed_domains photobucket.com pinterest.com reference.com - revision3.com rottentomatoes.com samsung.com - screenr.com scribd.com slideshare.net sourceforge.net speakerdeck.com spotify.com - squidoo.com streamable.com techcrunch.com ted.com @@ -124,7 +117,6 @@ def self.default_allowed_domains twitpic.com usatoday.com viddler.com - videojug.com vine.co walmart.com washingtonpost.com @@ -275,7 +267,6 @@ def data def rewrite_https(html) return unless html - uri = URI(@url) if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites) html = html.gsub("http://", "https://") end diff --git a/lib/onebox/engine/amazon_onebox.rb b/lib/onebox/engine/amazon_onebox.rb index eb5463c4..4ea26aa9 100644 --- a/lib/onebox/engine/amazon_onebox.rb +++ b/lib/onebox/engine/amazon_onebox.rb @@ -19,10 +19,8 @@ def url # If possible, fetch the cached HTML body immediately so we can # try to grab the canonical URL from that document, # rather than guess at the best URL structure to use - if body_cacher&.respond_to?('cache_response_body?') - if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s) - @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher) - end + if !@raw && has_cached_body + @raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher) end if @raw @@ -31,7 +29,8 @@ def url end if match && match[:id] - return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}" + id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH) + return "https://www.amazon.#{tld}/dp/#{id}" end @url @@ -49,6 +48,12 @@ def http_params private + def has_cached_body + body_cacher&.respond_to?('cache_response_body?') && + body_cacher.cache_response_body?(uri.to_s) && + body_cacher.cached_response_body_exists?(uri.to_s) + end + def match @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?[A-Z0-9]+)(?:\/|\?|$)/mi) end @@ -57,9 +62,9 @@ def image if (main_image = raw.css("#main-image")) && main_image.any? attributes = main_image.first.attributes - return attributes["data-a-hires"].to_s if attributes["data-a-hires"] - - if attributes["data-a-dynamic-image"] + if attributes["data-a-hires"] + return attributes["data-a-hires"].to_s + elsif attributes["data-a-dynamic-image"] return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first end end @@ -67,9 +72,11 @@ def image if (landing_image = raw.css("#landingImage")) && landing_image.any? attributes = landing_image.first.attributes - return attributes["data-old-hires"].to_s if attributes["data-old-hires"] - - landing_image.first["src"].to_s + if attributes["data-old-hires"] + return attributes["data-old-hires"].to_s + else + return landing_image.first["src"].to_s + end end if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any? @@ -91,16 +98,16 @@ def price end def multiple_authors(authors_xpath) - author_list = raw.xpath(authors_xpath) - authors = [] - author_list.each { |a| authors << a.inner_text.strip } - authors.join(", ") + raw + .xpath(authors_xpath) + .map { |a| a.inner_text.strip } + .join(", ") end def data og = ::Onebox::OpenGraph.new(raw) - if raw.at_css('#dp.book_mobile') #printed books + if raw.at_css('#dp.book_mobile') # printed books title = raw.at("h1#title")&.inner_text authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text diff --git a/lib/onebox/engine/flickr_onebox.rb b/lib/onebox/engine/flickr_onebox.rb index 8819d6b4..3ed26684 100644 --- a/lib/onebox/engine/flickr_onebox.rb +++ b/lib/onebox/engine/flickr_onebox.rb @@ -32,7 +32,7 @@ def album_html(og) #{album_title} - + HTML @@ -43,7 +43,7 @@ def image_html(og) <<-HTML - Imgur + Imgur HTML end diff --git a/lib/onebox/engine/gfycat_onebox.rb b/lib/onebox/engine/gfycat_onebox.rb index 10973c31..27fd4bf7 100644 --- a/lib/onebox/engine/gfycat_onebox.rb +++ b/lib/onebox/engine/gfycat_onebox.rb @@ -9,8 +9,8 @@ class GfycatOnebox matches_regexp(/^https?:\/\/gfycat\.com\//) always_https + # This engine should have priority over AllowlistedGenericOnebox. def self.priority - # This engine should have priority over AllowlistedGenericOnebox. 1 end @@ -21,6 +21,7 @@ def to_html Gfycat.com +

#{data[:title]} by @@ -36,11 +37,12 @@ def to_html +

#{data[:keywords]}

-

+
HTML @@ -61,52 +63,50 @@ def match @match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?.+)/) end - def nokogiri_page - @nokogiri_page ||= begin - response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil - Nokogiri::HTML(response) - end - end + def og_data + return @og_data if defined?(@og_data) - def get_og_data - og_data = {} + response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil + page = Nokogiri::HTML(response) + script = page.at_css('script[type="application/ld+json"]') - if json_string = nokogiri_page.at_css('script[type="application/ld+json"]')&.text - og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string)) + if json_string = script&.text + @og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string)) + else + @og_data = {} end - - og_data end def data - og_data = get_og_data + return @data if defined?(@data) - response = { + @data = { name: match[:name], title: og_data[:headline] || 'No Title', author: og_data[:author], - url: @url + url: @url, } - keywords = og_data[:keywords]&.split(',') - if keywords - response[:keywords] = keywords.map { |t| "##{t}" }.join(' ') + if keywords = og_data[:keywords]&.split(',') + @data[:keywords] = keywords + .map { |keyword| "##{keyword}" } + .join(' ') end if og_data[:video] content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl]) video_url = Pathname.new(content_url) - response[:webmUrl] = video_url.sub_ext(".webm").to_s - response[:mp4Url] = video_url.sub_ext(".mp4").to_s + @data[:webmUrl] = video_url.sub_ext(".webm").to_s + @data[:mp4Url] = video_url.sub_ext(".mp4").to_s thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl]) - response[:posterUrl] = thumbnail_url + @data[:posterUrl] = thumbnail_url - response[:width] = og_data[:video][:width] - response[:height] = og_data[:video][:height] + @data[:width] = og_data[:video][:width] + @data[:height] = og_data[:video][:height] end - response + @data end end end diff --git a/lib/onebox/engine/github_commit_onebox.rb b/lib/onebox/engine/github_commit_onebox.rb index 3bd06b28..d584eb16 100644 --- a/lib/onebox/engine/github_commit_onebox.rb +++ b/lib/onebox/engine/github_commit_onebox.rb @@ -10,7 +10,7 @@ class GithubCommitOnebox include JSON include Onebox::Mixins::GithubBody - matches_regexp Regexp.new("^https?://(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:/)?(?:.)*/commit/") + matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//) always_https def url diff --git a/lib/onebox/engine/github_folder_onebox.rb b/lib/onebox/engine/github_folder_onebox.rb index dfc4800b..a0c565f8 100644 --- a/lib/onebox/engine/github_folder_onebox.rb +++ b/lib/onebox/engine/github_folder_onebox.rb @@ -7,7 +7,7 @@ class GithubFolderOnebox include StandardEmbed include LayoutSupport - matches_regexp Regexp.new(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/) + matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/) always_https def self.priority diff --git a/lib/onebox/engine/google_docs_onebox.rb b/lib/onebox/engine/google_docs_onebox.rb index 971eb463..be1ce63b 100644 --- a/lib/onebox/engine/google_docs_onebox.rb +++ b/lib/onebox/engine/google_docs_onebox.rb @@ -6,58 +6,50 @@ class GoogleDocsOnebox include Engine include LayoutSupport - def self.supported_endpoints - %w(spreadsheets document forms presentation) - end - - def self.short_types - @shorttypes ||= { - spreadsheets: :sheets, - document: :docs, - presentation: :slides, - forms: :forms, - } - end - - matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?(#{supported_endpoints.join('|')}))\/d\/((?[\w-]*)).+$/) + SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation) + SHORT_TYPES = { + spreadsheets: :sheets, + document: :docs, + presentation: :slides, + forms: :forms, + } + + matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?[\w-]*)).+$/) always_https - protected + private def data - og_data = get_og_data + short_type = SHORT_TYPES[match[:endpoint].to_sym] + { link: link, - title: og_data[:title] || "Google #{shorttype.to_s.capitalize}", - description: Onebox::Helpers.truncate(og_data[:description], 250) || "This #{shorttype.to_s.chop.capitalize} is private", - type: shorttype + title: og_data[:title] || "Google #{short_type.to_s.capitalize}", + description: Onebox::Helpers.truncate(og_data[:description], 250) || "This #{short_type.to_s.chop.capitalize} is private", + type: short_type } end - def doc_type - @doc_type ||= match[:endpoint].to_sym - end - - def shorttype - GoogleDocsOnebox.short_types[doc_type] - end - def match @match ||= @url.match(@@matcher) end - def get_og_data + def og_data + return @og_data if defined?(@og_data) + response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil html = Nokogiri::HTML(response) - og_data = {} + @og_data = {} + html.css('meta').each do |m| - if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i) + if m.attribute('property')&.to_s&.match(/^og:/i) m_content = m.attribute('content').to_s.strip m_property = m.attribute('property').to_s.gsub('og:', '') - og_data[m_property.to_sym] = m_content + @og_data[m_property.to_sym] = m_content end end - og_data + + @og_data end end end diff --git a/lib/onebox/engine/google_maps_onebox.rb b/lib/onebox/engine/google_maps_onebox.rb index f6dcecdd..bbf8e21c 100644 --- a/lib/onebox/engine/google_maps_onebox.rb +++ b/lib/onebox/engine/google_maps_onebox.rb @@ -119,8 +119,6 @@ def resolve_url! @placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false" when :canonical - uri = URI(@url) - query = URI::decode_www_form(uri.query).to_h if !query.has_key?("ll") raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll") @@ -163,14 +161,20 @@ def rewrite_custom_url(url, target) end def follow_redirect! - uri = URI(@url) begin - http = Net::HTTP.start(uri.host, uri.port, - use_ssl: uri.scheme == 'https', open_timeout: timeout, read_timeout: timeout) - response = http.head(uri.path) + http = Net::HTTP.start( + uri.host, + uri.port, + use_ssl: uri.scheme == 'https', + open_timeout: timeout, + read_timeout: timeout + ) + response = http.head(uri.path) raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code) + @url = response.code == "200" ? uri.to_s : response["Location"] + @uri = URI(@url) ensure http.finish rescue nil end diff --git a/lib/onebox/engine/google_photos_onebox.rb b/lib/onebox/engine/google_photos_onebox.rb index cb1a3344..3a07b7df 100644 --- a/lib/onebox/engine/google_photos_onebox.rb +++ b/lib/onebox/engine/google_photos_onebox.rb @@ -11,9 +11,9 @@ class GooglePhotosOnebox def to_html og = get_opengraph - return video_html(og) if !og.video_secure_url.nil? - return album_html(og) if !og.type.nil? && og.type == "google_photos:photo_album" - return image_html(og) if !og.image.nil? + return video_html(og) if og.video_secure_url + return album_html(og) if og.type == "google_photos:photo_album" + return image_html(og) if og.image nil end @@ -32,7 +32,7 @@ def video_html(og)

#{og.title}

@@ -53,7 +53,7 @@ def album_html(og) #{Onebox::Helpers.truncate(album_title, 80)} - + HTML @@ -64,7 +64,7 @@ def image_html(og) <<-HTML - Google Photos + Google Photos HTML end diff --git a/lib/onebox/engine/imgur_onebox.rb b/lib/onebox/engine/imgur_onebox.rb index 922853db..26a90379 100644 --- a/lib/onebox/engine/imgur_onebox.rb +++ b/lib/onebox/engine/imgur_onebox.rb @@ -40,7 +40,7 @@ def album_html(og) #{album_title} - + HTML @@ -58,7 +58,7 @@ def image_html(og) <<-HTML - Imgur + Imgur HTML end diff --git a/lib/onebox/engine/instagram_onebox.rb b/lib/onebox/engine/instagram_onebox.rb index f150a268..21a8ae6c 100644 --- a/lib/onebox/engine/instagram_onebox.rb +++ b/lib/onebox/engine/instagram_onebox.rb @@ -18,9 +18,8 @@ def data oembed = get_oembed raise "No oEmbed data found. Ensure 'facebook_app_access_token' is valid" if oembed.data.empty? - permalink = clean_url.gsub("/#{oembed.author_name}/", "/") - - { link: permalink, + { + link: clean_url.gsub("/#{oembed.author_name}/", "/"), title: "@#{oembed.author_name}", image: oembed.thumbnail_url, description: Onebox::Helpers.truncate(oembed.title, 250), diff --git a/lib/onebox/engine/pastebin_onebox.rb b/lib/onebox/engine/pastebin_onebox.rb index 2e119dc9..d9b26467 100644 --- a/lib/onebox/engine/pastebin_onebox.rb +++ b/lib/onebox/engine/pastebin_onebox.rb @@ -30,29 +30,25 @@ def truncated? end def lines - return @lines if @lines + return @lines if defined?(@lines) response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue "" @lines = response.split("\n") end def paste_key - if uri.path =~ /\/raw\// - match = uri.path.match(/\/raw\/([^\/]+)/) - return match[1] if match && match[1] - elsif uri.path =~ /\/download\// - match = uri.path.match(/\/download\/([^\/]+)/) - return match[1] if match && match[1] - elsif uri.path =~ /\/embed\// - match = uri.path.match(/\/embed\/([^\/]+)/) - return match[1] if match && match[1] + regex = case uri + when /\/raw\// + /\/raw\/([^\/]+)/ + when /\/download\// + /\/download\/([^\/]+)/ + when /\/embed\// + /\/embed\/([^\/]+)/ else - match = uri.path.match(/\/([^\/]+)/) - return match[1] if match && match[1] + /\/([^\/]+)/ end - nil - rescue - nil + match = uri.path.match(regex) + match[1] if match && match[1] end end end diff --git a/lib/onebox/engine/pdf_onebox.rb b/lib/onebox/engine/pdf_onebox.rb index 8e8c09aa..2a8d46f0 100644 --- a/lib/onebox/engine/pdf_onebox.rb +++ b/lib/onebox/engine/pdf_onebox.rb @@ -12,25 +12,17 @@ class PdfOnebox private def data - pdf_info = get_pdf_info - raise "Unable to read pdf file: #{@url}" if pdf_info.nil? + begin + size = Onebox::Helpers.fetch_content_length(@url) + rescue + raise "Unable to read pdf file: #{@url}" + end - result = { link: link, - title: pdf_info[:name], - filesize: pdf_info[:filesize] - } - result - end - - def get_pdf_info - uri = URI.parse(@url) - size = Onebox::Helpers.fetch_content_length(@url) { + link: link, + title: File.basename(uri.path), filesize: size ? Onebox::Helpers.pretty_filesize(size.to_i) : nil, - name: File.basename(uri.path) } - rescue - nil end end end diff --git a/lib/onebox/engine/pubmed_onebox.rb b/lib/onebox/engine/pubmed_onebox.rb index 1355f598..1cf8a0ac 100644 --- a/lib/onebox/engine/pubmed_onebox.rb +++ b/lib/onebox/engine/pubmed_onebox.rb @@ -10,13 +10,14 @@ class PubmedOnebox private - def get_xml + def xml + return @xml if defined?(@xml) doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text"))) pre = doc.xpath("//pre") - Nokogiri::XML("" + pre.text + "") + @xml = Nokogiri::XML("" + pre.text + "") end - def authors_of_xml(xml) + def authors initials = xml.css("Initials").map { |x| x.content } last_names = xml.css("LastName").map { |x| x.content } author_list = (initials.zip(last_names)).map { |i, l| i + " " + l } @@ -27,22 +28,25 @@ def authors_of_xml(xml) author_list.join(", ") end - def date_of_xml(xml) - date_arr = (xml.css("PubDate").children).map { |x| x.content } - date_arr = date_arr.select { |s| !s.match(/^\s+$/) } - date_arr = (date_arr.map { |s| s.split }).flatten - date_arr.sort.reverse.join(" ") # Reverse sort so month before year. + def date + xml.css("PubDate") + .children + .map { |x| x.content } + .select { |s| !s.match(/^\s+$/) } + .map { |s| s.split } + .flatten + .sort + .reverse + .join(" ") # Reverse sort so month before year. end def data - xml = get_xml - { title: xml.css("ArticleTitle").text, - authors: authors_of_xml(xml), + authors: authors, journal: xml.css("Title").text, abstract: xml.css("AbstractText").text, - date: date_of_xml(xml), + date: date, link: @url, pmid: match[:pmid] } diff --git a/lib/onebox/engine/stack_exchange_onebox.rb b/lib/onebox/engine/stack_exchange_onebox.rb index 6a867546..c918ed93 100644 --- a/lib/onebox/engine/stack_exchange_onebox.rb +++ b/lib/onebox/engine/stack_exchange_onebox.rb @@ -25,7 +25,7 @@ def match end def url - domain = URI(@url).host + domain = uri.host question_id = match[:question_id] answer_id = match[:answer_id2] || match[:answer_id1] diff --git a/lib/onebox/engine/standard_embed.rb b/lib/onebox/engine/standard_embed.rb index 7d817256..f19f060c 100644 --- a/lib/onebox/engine/standard_embed.rb +++ b/lib/onebox/engine/standard_embed.rb @@ -7,7 +7,6 @@ module Onebox module Engine module StandardEmbed - def self.oembed_providers @@oembed_providers ||= {} end @@ -117,8 +116,6 @@ def get_json_response "{}" end - protected - def get_oembed_url oembed_url = nil diff --git a/lib/onebox/engine/trello_onebox.rb b/lib/onebox/engine/trello_onebox.rb index 9f38cd32..c74b39f0 100644 --- a/lib/onebox/engine/trello_onebox.rb +++ b/lib/onebox/engine/trello_onebox.rb @@ -11,12 +11,11 @@ class TrelloOnebox always_https def to_html - link = "https://trello.com/#{match[:type]}/#{match[:key]}.html" - + src = "https://trello.com/#{match[:type]}/#{match[:key]}.html" height = match[:type] == 'b' ? 400 : 200 <<-HTML - + HTML end @@ -25,12 +24,10 @@ def placeholder_html end private + def match return @match if defined?(@match) - @match = @url.match(%{trello\.com/(?[^/]+)/(?[^/]+)/?\W*}) - - @match end end end diff --git a/lib/onebox/engine/youku_onebox.rb b/lib/onebox/engine/youku_onebox.rb index f0417e8d..2b9234fd 100644 --- a/lib/onebox/engine/youku_onebox.rb +++ b/lib/onebox/engine/youku_onebox.rb @@ -30,12 +30,6 @@ def to_html > HTML end - - private - - def uri - @_uri ||= URI(@url) - end end end end diff --git a/lib/onebox/layout.rb b/lib/onebox/layout.rb index ccc69360..a697ba24 100644 --- a/lib/onebox/layout.rb +++ b/lib/onebox/layout.rb @@ -32,19 +32,7 @@ def to_html private def uri - @uri = URI(link) - end - - def checksum - @md5.hexdigest("#{VERSION}:#{link}") - end - - def link - ::Onebox::Helpers.normalize_url_for_output(record[:link]) - end - - def domain - record[:domain] || URI(link || '').host.to_s.sub(/^www\./, '') + @uri ||= URI(::Onebox::Helpers.normalize_url_for_output(record[:link])) end def details @@ -52,7 +40,7 @@ def details link: record[:link], title: record[:title], favicon: record[:favicon], - domain: domain, + domain: record[:domain] || uri.host.to_s.sub(/^www\./, ''), article_published_time: record[:article_published_time], article_published_time_title: record[:article_published_time_title], metadata_1_label: record[:metadata_1_label], diff --git a/lib/onebox/matcher.rb b/lib/onebox/matcher.rb index a17a248d..2e337904 100644 --- a/lib/onebox/matcher.rb +++ b/lib/onebox/matcher.rb @@ -2,8 +2,12 @@ module Onebox class Matcher - def initialize(link, options = {}) - @url = link + def initialize(url, options = {}) + begin + @uri = URI(url) + rescue URI::InvalidURIError + end + @options = options end @@ -14,12 +18,10 @@ def ordered_engines end def oneboxed - uri = URI(@url) - return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port) - return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme) - ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) } - rescue URI::InvalidURIError - nil + return if @uri.nil? + return if @uri.port && !Onebox.options.allowed_ports.include?(@uri.port) + return if @uri.scheme && !Onebox.options.allowed_schemes.include?(@uri.scheme) + ordered_engines.find { |engine| engine === @uri && has_allowed_iframe_origins?(engine) } end def has_allowed_iframe_origins?(engine) diff --git a/lib/onebox/mixins/git_blob_onebox.rb b/lib/onebox/mixins/git_blob_onebox.rb index b5251127..cac7b2a7 100644 --- a/lib/onebox/mixins/git_blob_onebox.rb +++ b/lib/onebox/mixins/git_blob_onebox.rb @@ -25,8 +25,8 @@ def self.included(klass) } module InstanceMethods - def initialize(link, timeout = nil) - super link, timeout + def initialize(url, timeout = nil) + super url, timeout # merge engine options from global Onebox.options interface # self.options = Onebox.options["GithubBlobOnebox"] # self.class.name.split("::").last.to_s # self.options = Onebox.options[self.class.name.split("::").last.to_s] #We can use this a more generic approach. extract the engine class name automatically @@ -163,11 +163,9 @@ def raw @file = m[:file] @lang = Onebox::FileTypeFinder.from_file_name(m[:file]) - if @lang == "stl" && link.match(/^https?:\/\/(www\.)?github\.com.*\/blob\//) - + if @lang == "stl" && link.match?(/^https?:\/\/(www\.)?github\.com.*\/blob\//) @model_file = @lang.dup @raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m) - else contents = URI.open(self.raw_template(m), read_timeout: timeout).read diff --git a/lib/onebox/open_graph.rb b/lib/onebox/open_graph.rb index f4379144..f78e3d08 100644 --- a/lib/onebox/open_graph.rb +++ b/lib/onebox/open_graph.rb @@ -17,10 +17,10 @@ def title_attr !title.nil? ? "title='#{title}'" : "" end - def get_secure_image - secure_link = URI(get(:image)) - secure_link.scheme = 'https' - secure_link.to_s + def secure_image_url + secure_url = URI(get(:image)) + secure_url.scheme = 'https' + secure_url.to_s end def method_missing(attr, *args, &block) diff --git a/lib/onebox/preview.rb b/lib/onebox/preview.rb index 48d29df2..ada48e1b 100644 --- a/lib/onebox/preview.rb +++ b/lib/onebox/preview.rb @@ -6,8 +6,8 @@ class Preview client_exception = defined?(Net::HTTPClientException) ? Net::HTTPClientException : Net::HTTPServerException WEB_EXCEPTIONS ||= [client_exception, OpenURI::HTTPError, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED] - def initialize(link, options = Onebox.options) - @url = link + def initialize(url, options = Onebox.options) + @url = url @options = options.dup allowed_origins = @options[:allowed_iframe_origins] || Onebox::Engine.all_iframe_origins diff --git a/spec/fixtures/googledocs.response b/spec/fixtures/googledocs.response new file mode 100644 index 00000000..be56d2cf --- /dev/null +++ b/spec/fixtures/googledocs.response @@ -0,0 +1,182 @@ +Lorem Ipsum! - Dokumenty Google
Lorem Ipsum
 Udostępnij
Używana przez Ciebie wersja przeglądarki nie jest już obsługiwana. Uaktualnij przeglądarkę do obsługiwanej wersji.Zamknij

diff --git a/spec/lib/onebox/engine/google_docs_onebox_spec.rb b/spec/lib/onebox/engine/google_docs_onebox_spec.rb index f67a639c..9646cbf5 100644 --- a/spec/lib/onebox/engine/google_docs_onebox_spec.rb +++ b/spec/lib/onebox/engine/google_docs_onebox_spec.rb @@ -3,35 +3,25 @@ require "spec_helper" describe Onebox::Engine::GoogleDocsOnebox do - context "Spreadsheets" do - let(:matcher) { described_class.new("https://docs.google.com/spreadsheets/d/SHEET_KEY/pubhtml") } - - it "should be a spreadsheet" do - expect(matcher.send(:shorttype)).to eq (:sheets) - end + before(:all) do + @link = "https://docs.google.com/document/d/DOC_KEY/pub" + fake(@link, response("googledocs")) end - context "Documents" do - let(:matcher) { described_class.new("https://docs.google.com/document/d/DOC_KEY/pub") } + include_context "engines" + it_behaves_like "an engine" - it "should be a document" do - expect(matcher.send(:shorttype)).to eq (:docs) + describe "#to_html" do + it "has title" do + expect(html).to include("Lorem Ipsum!") end - end - context "Presentaions" do - let(:matcher) { described_class.new("https://docs.google.com/presentation/d/PRESENTATION_KEY/pub") } - - it "should be a presentation" do - expect(matcher.send(:shorttype)).to eq (:slides) + it "has description" do + expect(html).to include("Lorem Ipsum Lorem ipsum dolor sit amet, consectetur adipiscing elit") end - end - - context "Forms" do - let(:matcher) { described_class.new("https://docs.google.com/forms/d/FORMS_KEY/viewform") } - it "should be a form" do - expect(matcher.send(:shorttype)).to eq (:forms) + it "has icon" do + expect(html).to include("googledocs-onebox-logo g-docs-logo") end end end diff --git a/spec/lib/onebox/engine/json_spec.rb b/spec/lib/onebox/engine/json_spec.rb index 7388c558..07ae2e39 100644 --- a/spec/lib/onebox/engine/json_spec.rb +++ b/spec/lib/onebox/engine/json_spec.rb @@ -15,6 +15,7 @@ class OneboxEngineJSON def initialize(link) @url = link + @options = {} end end diff --git a/spec/lib/onebox/layout_spec.rb b/spec/lib/onebox/layout_spec.rb index cd6b4f7f..63487d78 100644 --- a/spec/lib/onebox/layout_spec.rb +++ b/spec/lib/onebox/layout_spec.rb @@ -15,6 +15,10 @@ Onebox.options.load_paths << "directory_b" end + after(:each) do + Onebox.options.load_paths.pop(2) + end + context "when template exists in directory_b" do before(:each) do allow_any_instance_of(described_class).to receive(:template?) { |_, path| path == "directory_b" } @@ -40,10 +44,6 @@ expect(template_path).to include("template") end end - - after(:each) do - Onebox.options.load_paths.pop(2) - end end describe "#to_html" do diff --git a/spec/lib/onebox/matcher_spec.rb b/spec/lib/onebox/matcher_spec.rb index b9b3e6f4..f252ae02 100644 --- a/spec/lib/onebox/matcher_spec.rb +++ b/spec/lib/onebox/matcher_spec.rb @@ -14,8 +14,8 @@ def self.iframe_origins describe Onebox::Matcher do let(:opts) { { allowed_iframe_regexes: [/.*/] } } - describe "oneboxed" do + describe "oneboxed" do describe "with a path" do let(:url) { "http://party.time.made.up-url.com/beep/boop" } let(:matcher) { Onebox::Matcher.new(url, opts) } @@ -101,6 +101,5 @@ def self.iframe_origins expect(matcher.oneboxed).not_to be_nil end end - end end diff --git a/spec/lib/onebox/oembed_spec.rb b/spec/lib/onebox/oembed_spec.rb index ae50dc0e..2b6d3bd2 100644 --- a/spec/lib/onebox/oembed_spec.rb +++ b/spec/lib/onebox/oembed_spec.rb @@ -4,7 +4,6 @@ require "onebox/oembed" describe Onebox::Oembed do - it "excludes html tags" do json = '{"text": ""}' oembed = described_class.new(json) diff --git a/spec/lib/onebox/open_graph_spec.rb b/spec/lib/onebox/open_graph_spec.rb index cddf21be..ede7afa7 100644 --- a/spec/lib/onebox/open_graph_spec.rb +++ b/spec/lib/onebox/open_graph_spec.rb @@ -4,7 +4,6 @@ require "onebox/open_graph" describe Onebox::OpenGraph do - it "excludes html tags in title" do doc = Nokogiri::HTML('Did’ you <b>miss me</b>? - Album on Imgur') og = described_class.new(doc) @@ -18,5 +17,4 @@ og = described_class.new(doc) expect(og.image).to eq("http://test.com/test'ing.mp3") end - end diff --git a/spec/lib/onebox/preview_spec.rb b/spec/lib/onebox/preview_spec.rb index 787fb56b..91226374 100644 --- a/spec/lib/onebox/preview_spec.rb +++ b/spec/lib/onebox/preview_spec.rb @@ -3,7 +3,6 @@ require "spec_helper" describe Onebox::Preview do - before do fake("https://www.amazon.com/product", response("amazon")) end @@ -70,7 +69,6 @@ result = preview.to_s expect(result).not_to match(/onerror/) end - end describe "iframe sanitizer" do @@ -100,7 +98,5 @@ result = preview.to_s expect(result).to include ' src="https://thirdparty.example.com"' end - end - end diff --git a/templates/allowlistedgeneric.mustache b/templates/allowlistedgeneric.mustache index 0d0fde9f..abc700be 100644 --- a/templates/allowlistedgeneric.mustache +++ b/templates/allowlistedgeneric.mustache @@ -3,15 +3,14 @@

{{title}}

{{#description}} -

{{description}}

+

{{description}}

{{/description}} {{#data_1}} -

{{label_1}}: {{data_1}} - {{#data_2}} - {{label_2}}: {{data_2}}

- {{/data_2}} - {{^data_2}} -

- {{/data_2}} +

+ {{label_1}}: {{data_1}} + {{#data_2}} + {{label_2}}: {{data_2}} + {{/data_2}} +

{{/data_1}}