From b816ada0cc09a374abdddf195c7986668862a26b Mon Sep 17 00:00:00 2001
From: Jarek Radosz
Date: Tue, 20 Apr 2021 10:14:14 +0200
Subject: [PATCH] DEV: General refactoring and cleanup (#474)
Included:
* DEV: Remove some defunct sites
* DEV: Refactor GoogleDocsOnebox
* DEV: Refactor PastebinOnebox
* DEV: Refactor GfycatOnebox
* DEV: Refactor PdfOnebox
* DEV: Refactor PubmedOnebox
* DEV: Refactor AmazonOnebox
* DEV: Minor InstagramOnebox clean up
* DEV: Small tweaks to TrelloOnebox
Other stuff:
* DEV: Don't use `link` and `url` interchangeably
* DEV: Roll `Layout#link` into `Layout#uri`
* DEV: Properly memoize `Layout#uri`
* DEV: Re-use `Layout#uri`
* DEV: Rename `get_secure_image` to `secure_image_url`
* DEV: Normalize `matches_regexp` calls
* DEV: Use path-specific encoding rules
* DEV: `Layout#checksum` is no longer used
* DEV: Remove double `protected`
* DEV: Refactor `Matcher`
* DEV: Use the existing `Engine#uri` method
* DEV: The first `Engine#initialize` arg is `url`
* DEV: Use `match?`
* DEV: Simplify allowlistedgeneric template
* DEV: Initialize `@options` in json spec
* DEV: Simplify conditionals
* DEV: Merge attr_readers
* DEV: Whitespace, `after()` before tests
---
lib/onebox/engine.rb | 16 +-
.../engine/allowlisted_generic_onebox.rb | 9 -
lib/onebox/engine/amazon_onebox.rb | 39 ++--
lib/onebox/engine/flickr_onebox.rb | 4 +-
lib/onebox/engine/gfycat_onebox.rb | 52 ++---
lib/onebox/engine/github_commit_onebox.rb | 2 +-
lib/onebox/engine/github_folder_onebox.rb | 2 +-
lib/onebox/engine/google_docs_onebox.rb | 56 +++---
lib/onebox/engine/google_maps_onebox.rb | 16 +-
lib/onebox/engine/google_photos_onebox.rb | 12 +-
lib/onebox/engine/imgur_onebox.rb | 4 +-
lib/onebox/engine/instagram_onebox.rb | 5 +-
lib/onebox/engine/pastebin_onebox.rb | 26 ++-
lib/onebox/engine/pdf_onebox.rb | 22 +--
lib/onebox/engine/pubmed_onebox.rb | 28 +--
lib/onebox/engine/stack_exchange_onebox.rb | 2 +-
lib/onebox/engine/standard_embed.rb | 3 -
lib/onebox/engine/trello_onebox.rb | 9 +-
lib/onebox/engine/youku_onebox.rb | 6 -
lib/onebox/layout.rb | 16 +-
lib/onebox/matcher.rb | 18 +-
lib/onebox/mixins/git_blob_onebox.rb | 8 +-
lib/onebox/open_graph.rb | 8 +-
lib/onebox/preview.rb | 4 +-
spec/fixtures/googledocs.response | 182 ++++++++++++++++++
.../onebox/engine/google_docs_onebox_spec.rb | 34 ++--
spec/lib/onebox/engine/json_spec.rb | 1 +
spec/lib/onebox/layout_spec.rb | 8 +-
spec/lib/onebox/matcher_spec.rb | 3 +-
spec/lib/onebox/oembed_spec.rb | 1 -
spec/lib/onebox/open_graph_spec.rb | 2 -
spec/lib/onebox/preview_spec.rb | 4 -
templates/allowlistedgeneric.mustache | 15 +-
33 files changed, 369 insertions(+), 248 deletions(-)
create mode 100644 spec/fixtures/googledocs.response
diff --git a/lib/onebox/engine.rb b/lib/onebox/engine.rb
index 4986b32e..9f641808 100644
--- a/lib/onebox/engine.rb
+++ b/lib/onebox/engine.rb
@@ -28,23 +28,19 @@ def self.origins_to_regexes(origins)
end
end
- attr_reader :url, :uri
- attr_reader :timeout
+ attr_reader :url, :uri, :options, :timeout
attr :errors
DEFAULT = {}
- def options
- @options
- end
def options=(opt)
- return @options if opt.nil? #make sure options provided
- opt = opt.to_h if opt.instance_of?(OpenStruct)
+ return @options if opt.nil? # make sure options provided
+ opt = opt.to_h if opt.instance_of?(OpenStruct)
@options.merge!(opt)
@options
end
- def initialize(link, timeout = nil)
+ def initialize(url, timeout = nil)
@errors = {}
@options = DEFAULT
class_name = self.class.name.split("::").last.to_s
@@ -52,8 +48,8 @@ def initialize(link, timeout = nil)
# Set the engine options extracted from global options.
self.options = Onebox.options[class_name] || {}
- @url = link
- @uri = URI(link)
+ @url = url
+ @uri = URI(url)
if always_https?
@uri.scheme = 'https'
@url = @uri.to_s
diff --git a/lib/onebox/engine/allowlisted_generic_onebox.rb b/lib/onebox/engine/allowlisted_generic_onebox.rb
index 0397296b..f04e064a 100644
--- a/lib/onebox/engine/allowlisted_generic_onebox.rb
+++ b/lib/onebox/engine/allowlisted_generic_onebox.rb
@@ -27,7 +27,6 @@ def self.default_allowed_domains
500px.com
8tracks.com
abc.net.au
- about.com
answers.com
arstechnica.com
ask.com
@@ -36,11 +35,9 @@ def self.default_allowed_domains
bbs.boingboing.net
bestbuy.ca
bestbuy.com
- blip.tv
bloomberg.com
businessinsider.com
change.org
- clikthrough.com
cnet.com
cnn.com
codepen.io
@@ -90,7 +87,6 @@ def self.default_allowed_domains
meetup.com
mixcloud.com
mlb.com
- myshopify.com
myspace.com
nba.com
npr.org
@@ -98,16 +94,13 @@ def self.default_allowed_domains
photobucket.com
pinterest.com
reference.com
- revision3.com
rottentomatoes.com
samsung.com
- screenr.com
scribd.com
slideshare.net
sourceforge.net
speakerdeck.com
spotify.com
- squidoo.com
streamable.com
techcrunch.com
ted.com
@@ -124,7 +117,6 @@ def self.default_allowed_domains
twitpic.com
usatoday.com
viddler.com
- videojug.com
vine.co
walmart.com
washingtonpost.com
@@ -275,7 +267,6 @@ def data
def rewrite_https(html)
return unless html
- uri = URI(@url)
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
html = html.gsub("http://", "https://")
end
diff --git a/lib/onebox/engine/amazon_onebox.rb b/lib/onebox/engine/amazon_onebox.rb
index eb5463c4..4ea26aa9 100644
--- a/lib/onebox/engine/amazon_onebox.rb
+++ b/lib/onebox/engine/amazon_onebox.rb
@@ -19,10 +19,8 @@ def url
# If possible, fetch the cached HTML body immediately so we can
# try to grab the canonical URL from that document,
# rather than guess at the best URL structure to use
- if body_cacher&.respond_to?('cache_response_body?')
- if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
- @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
- end
+ if !@raw && has_cached_body
+ @raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
end
if @raw
@@ -31,7 +29,8 @@ def url
end
if match && match[:id]
- return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
+ id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
+ return "https://www.amazon.#{tld}/dp/#{id}"
end
@url
@@ -49,6 +48,12 @@ def http_params
private
+ def has_cached_body
+ body_cacher&.respond_to?('cache_response_body?') &&
+ body_cacher.cache_response_body?(uri.to_s) &&
+ body_cacher.cached_response_body_exists?(uri.to_s)
+ end
+
def match
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?[A-Z0-9]+)(?:\/|\?|$)/mi)
end
@@ -57,9 +62,9 @@ def image
if (main_image = raw.css("#main-image")) && main_image.any?
attributes = main_image.first.attributes
- return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
-
- if attributes["data-a-dynamic-image"]
+ if attributes["data-a-hires"]
+ return attributes["data-a-hires"].to_s
+ elsif attributes["data-a-dynamic-image"]
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
end
end
@@ -67,9 +72,11 @@ def image
if (landing_image = raw.css("#landingImage")) && landing_image.any?
attributes = landing_image.first.attributes
- return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
-
- landing_image.first["src"].to_s
+ if attributes["data-old-hires"]
+ return attributes["data-old-hires"].to_s
+ else
+ return landing_image.first["src"].to_s
+ end
end
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
@@ -91,16 +98,16 @@ def price
end
def multiple_authors(authors_xpath)
- author_list = raw.xpath(authors_xpath)
- authors = []
- author_list.each { |a| authors << a.inner_text.strip }
- authors.join(", ")
+ raw
+ .xpath(authors_xpath)
+ .map { |a| a.inner_text.strip }
+ .join(", ")
end
def data
og = ::Onebox::OpenGraph.new(raw)
- if raw.at_css('#dp.book_mobile') #printed books
+ if raw.at_css('#dp.book_mobile') # printed books
title = raw.at("h1#title")&.inner_text
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
diff --git a/lib/onebox/engine/flickr_onebox.rb b/lib/onebox/engine/flickr_onebox.rb
index 8819d6b4..3ed26684 100644
--- a/lib/onebox/engine/flickr_onebox.rb
+++ b/lib/onebox/engine/flickr_onebox.rb
@@ -32,7 +32,7 @@ def album_html(og)
#{album_title}
-
+
HTML
@@ -43,7 +43,7 @@ def image_html(og)
<<-HTML
-
+
HTML
end
diff --git a/lib/onebox/engine/gfycat_onebox.rb b/lib/onebox/engine/gfycat_onebox.rb
index 10973c31..27fd4bf7 100644
--- a/lib/onebox/engine/gfycat_onebox.rb
+++ b/lib/onebox/engine/gfycat_onebox.rb
@@ -9,8 +9,8 @@ class GfycatOnebox
matches_regexp(/^https?:\/\/gfycat\.com\//)
always_https
+ # This engine should have priority over AllowlistedGenericOnebox.
def self.priority
- # This engine should have priority over AllowlistedGenericOnebox.
1
end
@@ -21,6 +21,7 @@ def to_html
Gfycat.com
+
#{data[:title]} by
@@ -36,11 +37,12 @@ def to_html
+
#{data[:keywords]}
-
+
HTML
@@ -61,52 +63,50 @@ def match
@match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?.+)/)
end
- def nokogiri_page
- @nokogiri_page ||= begin
- response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
- Nokogiri::HTML(response)
- end
- end
+ def og_data
+ return @og_data if defined?(@og_data)
- def get_og_data
- og_data = {}
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
+ page = Nokogiri::HTML(response)
+ script = page.at_css('script[type="application/ld+json"]')
- if json_string = nokogiri_page.at_css('script[type="application/ld+json"]')&.text
- og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
+ if json_string = script&.text
+ @og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
+ else
+ @og_data = {}
end
-
- og_data
end
def data
- og_data = get_og_data
+ return @data if defined?(@data)
- response = {
+ @data = {
name: match[:name],
title: og_data[:headline] || 'No Title',
author: og_data[:author],
- url: @url
+ url: @url,
}
- keywords = og_data[:keywords]&.split(',')
- if keywords
- response[:keywords] = keywords.map { |t| "##{t} " }.join(' ')
+ if keywords = og_data[:keywords]&.split(',')
+ @data[:keywords] = keywords
+ .map { |keyword| "##{keyword} " }
+ .join(' ')
end
if og_data[:video]
content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
video_url = Pathname.new(content_url)
- response[:webmUrl] = video_url.sub_ext(".webm").to_s
- response[:mp4Url] = video_url.sub_ext(".mp4").to_s
+ @data[:webmUrl] = video_url.sub_ext(".webm").to_s
+ @data[:mp4Url] = video_url.sub_ext(".mp4").to_s
thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
- response[:posterUrl] = thumbnail_url
+ @data[:posterUrl] = thumbnail_url
- response[:width] = og_data[:video][:width]
- response[:height] = og_data[:video][:height]
+ @data[:width] = og_data[:video][:width]
+ @data[:height] = og_data[:video][:height]
end
- response
+ @data
end
end
end
diff --git a/lib/onebox/engine/github_commit_onebox.rb b/lib/onebox/engine/github_commit_onebox.rb
index 3bd06b28..d584eb16 100644
--- a/lib/onebox/engine/github_commit_onebox.rb
+++ b/lib/onebox/engine/github_commit_onebox.rb
@@ -10,7 +10,7 @@ class GithubCommitOnebox
include JSON
include Onebox::Mixins::GithubBody
- matches_regexp Regexp.new("^https?://(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:/)?(?:.)*/commit/")
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
always_https
def url
diff --git a/lib/onebox/engine/github_folder_onebox.rb b/lib/onebox/engine/github_folder_onebox.rb
index dfc4800b..a0c565f8 100644
--- a/lib/onebox/engine/github_folder_onebox.rb
+++ b/lib/onebox/engine/github_folder_onebox.rb
@@ -7,7 +7,7 @@ class GithubFolderOnebox
include StandardEmbed
include LayoutSupport
- matches_regexp Regexp.new(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
always_https
def self.priority
diff --git a/lib/onebox/engine/google_docs_onebox.rb b/lib/onebox/engine/google_docs_onebox.rb
index 971eb463..be1ce63b 100644
--- a/lib/onebox/engine/google_docs_onebox.rb
+++ b/lib/onebox/engine/google_docs_onebox.rb
@@ -6,58 +6,50 @@ class GoogleDocsOnebox
include Engine
include LayoutSupport
- def self.supported_endpoints
- %w(spreadsheets document forms presentation)
- end
-
- def self.short_types
- @shorttypes ||= {
- spreadsheets: :sheets,
- document: :docs,
- presentation: :slides,
- forms: :forms,
- }
- end
-
- matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?(#{supported_endpoints.join('|')}))\/d\/((?[\w-]*)).+$/)
+ SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
+ SHORT_TYPES = {
+ spreadsheets: :sheets,
+ document: :docs,
+ presentation: :slides,
+ forms: :forms,
+ }
+
+ matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?[\w-]*)).+$/)
always_https
- protected
+ private
def data
- og_data = get_og_data
+ short_type = SHORT_TYPES[match[:endpoint].to_sym]
+
{
link: link,
- title: og_data[:title] || "Google #{shorttype.to_s.capitalize}",
- description: Onebox::Helpers.truncate(og_data[:description], 250) || "This #{shorttype.to_s.chop.capitalize} is private",
- type: shorttype
+ title: og_data[:title] || "Google #{short_type.to_s.capitalize}",
+ description: Onebox::Helpers.truncate(og_data[:description], 250) || "This #{short_type.to_s.chop.capitalize} is private",
+ type: short_type
}
end
- def doc_type
- @doc_type ||= match[:endpoint].to_sym
- end
-
- def shorttype
- GoogleDocsOnebox.short_types[doc_type]
- end
-
def match
@match ||= @url.match(@@matcher)
end
- def get_og_data
+ def og_data
+ return @og_data if defined?(@og_data)
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
html = Nokogiri::HTML(response)
- og_data = {}
+ @og_data = {}
+
html.css('meta').each do |m|
- if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
+ if m.attribute('property')&.to_s&.match(/^og:/i)
m_content = m.attribute('content').to_s.strip
m_property = m.attribute('property').to_s.gsub('og:', '')
- og_data[m_property.to_sym] = m_content
+ @og_data[m_property.to_sym] = m_content
end
end
- og_data
+
+ @og_data
end
end
end
diff --git a/lib/onebox/engine/google_maps_onebox.rb b/lib/onebox/engine/google_maps_onebox.rb
index f6dcecdd..bbf8e21c 100644
--- a/lib/onebox/engine/google_maps_onebox.rb
+++ b/lib/onebox/engine/google_maps_onebox.rb
@@ -119,8 +119,6 @@ def resolve_url!
@placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
when :canonical
- uri = URI(@url)
-
query = URI::decode_www_form(uri.query).to_h
if !query.has_key?("ll")
raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
@@ -163,14 +161,20 @@ def rewrite_custom_url(url, target)
end
def follow_redirect!
- uri = URI(@url)
begin
- http = Net::HTTP.start(uri.host, uri.port,
- use_ssl: uri.scheme == 'https', open_timeout: timeout, read_timeout: timeout)
- response = http.head(uri.path)
+ http = Net::HTTP.start(
+ uri.host,
+ uri.port,
+ use_ssl: uri.scheme == 'https',
+ open_timeout: timeout,
+ read_timeout: timeout
+ )
+ response = http.head(uri.path)
raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
+
@url = response.code == "200" ? uri.to_s : response["Location"]
+ @uri = URI(@url)
ensure
http.finish rescue nil
end
diff --git a/lib/onebox/engine/google_photos_onebox.rb b/lib/onebox/engine/google_photos_onebox.rb
index cb1a3344..3a07b7df 100644
--- a/lib/onebox/engine/google_photos_onebox.rb
+++ b/lib/onebox/engine/google_photos_onebox.rb
@@ -11,9 +11,9 @@ class GooglePhotosOnebox
def to_html
og = get_opengraph
- return video_html(og) if !og.video_secure_url.nil?
- return album_html(og) if !og.type.nil? && og.type == "google_photos:photo_album"
- return image_html(og) if !og.image.nil?
+ return video_html(og) if og.video_secure_url
+ return album_html(og) if og.type == "google_photos:photo_album"
+ return image_html(og) if og.image
nil
end
@@ -32,7 +32,7 @@ def video_html(og)
@@ -53,7 +53,7 @@ def album_html(og)
#{Onebox::Helpers.truncate(album_title, 80)}
-
+
HTML
@@ -64,7 +64,7 @@ def image_html(og)
<<-HTML
-
+
HTML
end
diff --git a/lib/onebox/engine/imgur_onebox.rb b/lib/onebox/engine/imgur_onebox.rb
index 922853db..26a90379 100644
--- a/lib/onebox/engine/imgur_onebox.rb
+++ b/lib/onebox/engine/imgur_onebox.rb
@@ -40,7 +40,7 @@ def album_html(og)
#{album_title}
-
+
HTML
@@ -58,7 +58,7 @@ def image_html(og)
<<-HTML
-
+
HTML
end
diff --git a/lib/onebox/engine/instagram_onebox.rb b/lib/onebox/engine/instagram_onebox.rb
index f150a268..21a8ae6c 100644
--- a/lib/onebox/engine/instagram_onebox.rb
+++ b/lib/onebox/engine/instagram_onebox.rb
@@ -18,9 +18,8 @@ def data
oembed = get_oembed
raise "No oEmbed data found. Ensure 'facebook_app_access_token' is valid" if oembed.data.empty?
- permalink = clean_url.gsub("/#{oembed.author_name}/", "/")
-
- { link: permalink,
+ {
+ link: clean_url.gsub("/#{oembed.author_name}/", "/"),
title: "@#{oembed.author_name}",
image: oembed.thumbnail_url,
description: Onebox::Helpers.truncate(oembed.title, 250),
diff --git a/lib/onebox/engine/pastebin_onebox.rb b/lib/onebox/engine/pastebin_onebox.rb
index 2e119dc9..d9b26467 100644
--- a/lib/onebox/engine/pastebin_onebox.rb
+++ b/lib/onebox/engine/pastebin_onebox.rb
@@ -30,29 +30,25 @@ def truncated?
end
def lines
- return @lines if @lines
+ return @lines if defined?(@lines)
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
@lines = response.split("\n")
end
def paste_key
- if uri.path =~ /\/raw\//
- match = uri.path.match(/\/raw\/([^\/]+)/)
- return match[1] if match && match[1]
- elsif uri.path =~ /\/download\//
- match = uri.path.match(/\/download\/([^\/]+)/)
- return match[1] if match && match[1]
- elsif uri.path =~ /\/embed\//
- match = uri.path.match(/\/embed\/([^\/]+)/)
- return match[1] if match && match[1]
+ regex = case uri
+ when /\/raw\//
+ /\/raw\/([^\/]+)/
+ when /\/download\//
+ /\/download\/([^\/]+)/
+ when /\/embed\//
+ /\/embed\/([^\/]+)/
else
- match = uri.path.match(/\/([^\/]+)/)
- return match[1] if match && match[1]
+ /\/([^\/]+)/
end
- nil
- rescue
- nil
+ match = uri.path.match(regex)
+ match[1] if match && match[1]
end
end
end
diff --git a/lib/onebox/engine/pdf_onebox.rb b/lib/onebox/engine/pdf_onebox.rb
index 8e8c09aa..2a8d46f0 100644
--- a/lib/onebox/engine/pdf_onebox.rb
+++ b/lib/onebox/engine/pdf_onebox.rb
@@ -12,25 +12,17 @@ class PdfOnebox
private
def data
- pdf_info = get_pdf_info
- raise "Unable to read pdf file: #{@url}" if pdf_info.nil?
+ begin
+ size = Onebox::Helpers.fetch_content_length(@url)
+ rescue
+ raise "Unable to read pdf file: #{@url}"
+ end
- result = { link: link,
- title: pdf_info[:name],
- filesize: pdf_info[:filesize]
- }
- result
- end
-
- def get_pdf_info
- uri = URI.parse(@url)
- size = Onebox::Helpers.fetch_content_length(@url)
{
+ link: link,
+ title: File.basename(uri.path),
filesize: size ? Onebox::Helpers.pretty_filesize(size.to_i) : nil,
- name: File.basename(uri.path)
}
- rescue
- nil
end
end
end
diff --git a/lib/onebox/engine/pubmed_onebox.rb b/lib/onebox/engine/pubmed_onebox.rb
index 1355f598..1cf8a0ac 100644
--- a/lib/onebox/engine/pubmed_onebox.rb
+++ b/lib/onebox/engine/pubmed_onebox.rb
@@ -10,13 +10,14 @@ class PubmedOnebox
private
- def get_xml
+ def xml
+ return @xml if defined?(@xml)
doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
pre = doc.xpath("//pre")
- Nokogiri::XML("" + pre.text + " ")
+ @xml = Nokogiri::XML("" + pre.text + " ")
end
- def authors_of_xml(xml)
+ def authors
initials = xml.css("Initials").map { |x| x.content }
last_names = xml.css("LastName").map { |x| x.content }
author_list = (initials.zip(last_names)).map { |i, l| i + " " + l }
@@ -27,22 +28,25 @@ def authors_of_xml(xml)
author_list.join(", ")
end
- def date_of_xml(xml)
- date_arr = (xml.css("PubDate").children).map { |x| x.content }
- date_arr = date_arr.select { |s| !s.match(/^\s+$/) }
- date_arr = (date_arr.map { |s| s.split }).flatten
- date_arr.sort.reverse.join(" ") # Reverse sort so month before year.
+ def date
+ xml.css("PubDate")
+ .children
+ .map { |x| x.content }
+ .select { |s| !s.match(/^\s+$/) }
+ .map { |s| s.split }
+ .flatten
+ .sort
+ .reverse
+ .join(" ") # Reverse sort so month before year.
end
def data
- xml = get_xml
-
{
title: xml.css("ArticleTitle").text,
- authors: authors_of_xml(xml),
+ authors: authors,
journal: xml.css("Title").text,
abstract: xml.css("AbstractText").text,
- date: date_of_xml(xml),
+ date: date,
link: @url,
pmid: match[:pmid]
}
diff --git a/lib/onebox/engine/stack_exchange_onebox.rb b/lib/onebox/engine/stack_exchange_onebox.rb
index 6a867546..c918ed93 100644
--- a/lib/onebox/engine/stack_exchange_onebox.rb
+++ b/lib/onebox/engine/stack_exchange_onebox.rb
@@ -25,7 +25,7 @@ def match
end
def url
- domain = URI(@url).host
+ domain = uri.host
question_id = match[:question_id]
answer_id = match[:answer_id2] || match[:answer_id1]
diff --git a/lib/onebox/engine/standard_embed.rb b/lib/onebox/engine/standard_embed.rb
index 7d817256..f19f060c 100644
--- a/lib/onebox/engine/standard_embed.rb
+++ b/lib/onebox/engine/standard_embed.rb
@@ -7,7 +7,6 @@
module Onebox
module Engine
module StandardEmbed
-
def self.oembed_providers
@@oembed_providers ||= {}
end
@@ -117,8 +116,6 @@ def get_json_response
"{}"
end
- protected
-
def get_oembed_url
oembed_url = nil
diff --git a/lib/onebox/engine/trello_onebox.rb b/lib/onebox/engine/trello_onebox.rb
index 9f38cd32..c74b39f0 100644
--- a/lib/onebox/engine/trello_onebox.rb
+++ b/lib/onebox/engine/trello_onebox.rb
@@ -11,12 +11,11 @@ class TrelloOnebox
always_https
def to_html
- link = "https://trello.com/#{match[:type]}/#{match[:key]}.html"
-
+ src = "https://trello.com/#{match[:type]}/#{match[:key]}.html"
height = match[:type] == 'b' ? 400 : 200
<<-HTML
-
+
HTML
end
@@ -25,12 +24,10 @@ def placeholder_html
end
private
+
def match
return @match if defined?(@match)
-
@match = @url.match(%{trello\.com/(?[^/]+)/(?[^/]+)/?\W*})
-
- @match
end
end
end
diff --git a/lib/onebox/engine/youku_onebox.rb b/lib/onebox/engine/youku_onebox.rb
index f0417e8d..2b9234fd 100644
--- a/lib/onebox/engine/youku_onebox.rb
+++ b/lib/onebox/engine/youku_onebox.rb
@@ -30,12 +30,6 @@ def to_html
>
HTML
end
-
- private
-
- def uri
- @_uri ||= URI(@url)
- end
end
end
end
diff --git a/lib/onebox/layout.rb b/lib/onebox/layout.rb
index ccc69360..a697ba24 100644
--- a/lib/onebox/layout.rb
+++ b/lib/onebox/layout.rb
@@ -32,19 +32,7 @@ def to_html
private
def uri
- @uri = URI(link)
- end
-
- def checksum
- @md5.hexdigest("#{VERSION}:#{link}")
- end
-
- def link
- ::Onebox::Helpers.normalize_url_for_output(record[:link])
- end
-
- def domain
- record[:domain] || URI(link || '').host.to_s.sub(/^www\./, '')
+ @uri ||= URI(::Onebox::Helpers.normalize_url_for_output(record[:link]))
end
def details
@@ -52,7 +40,7 @@ def details
link: record[:link],
title: record[:title],
favicon: record[:favicon],
- domain: domain,
+ domain: record[:domain] || uri.host.to_s.sub(/^www\./, ''),
article_published_time: record[:article_published_time],
article_published_time_title: record[:article_published_time_title],
metadata_1_label: record[:metadata_1_label],
diff --git a/lib/onebox/matcher.rb b/lib/onebox/matcher.rb
index a17a248d..2e337904 100644
--- a/lib/onebox/matcher.rb
+++ b/lib/onebox/matcher.rb
@@ -2,8 +2,12 @@
module Onebox
class Matcher
- def initialize(link, options = {})
- @url = link
+ def initialize(url, options = {})
+ begin
+ @uri = URI(url)
+ rescue URI::InvalidURIError
+ end
+
@options = options
end
@@ -14,12 +18,10 @@ def ordered_engines
end
def oneboxed
- uri = URI(@url)
- return unless uri.port.nil? || Onebox.options.allowed_ports.include?(uri.port)
- return unless uri.scheme.nil? || Onebox.options.allowed_schemes.include?(uri.scheme)
- ordered_engines.find { |engine| engine === uri && has_allowed_iframe_origins?(engine) }
- rescue URI::InvalidURIError
- nil
+ return if @uri.nil?
+ return if @uri.port && !Onebox.options.allowed_ports.include?(@uri.port)
+ return if @uri.scheme && !Onebox.options.allowed_schemes.include?(@uri.scheme)
+ ordered_engines.find { |engine| engine === @uri && has_allowed_iframe_origins?(engine) }
end
def has_allowed_iframe_origins?(engine)
diff --git a/lib/onebox/mixins/git_blob_onebox.rb b/lib/onebox/mixins/git_blob_onebox.rb
index b5251127..cac7b2a7 100644
--- a/lib/onebox/mixins/git_blob_onebox.rb
+++ b/lib/onebox/mixins/git_blob_onebox.rb
@@ -25,8 +25,8 @@ def self.included(klass)
}
module InstanceMethods
- def initialize(link, timeout = nil)
- super link, timeout
+ def initialize(url, timeout = nil)
+ super url, timeout
# merge engine options from global Onebox.options interface
# self.options = Onebox.options["GithubBlobOnebox"] # self.class.name.split("::").last.to_s
# self.options = Onebox.options[self.class.name.split("::").last.to_s] #We can use this a more generic approach. extract the engine class name automatically
@@ -163,11 +163,9 @@ def raw
@file = m[:file]
@lang = Onebox::FileTypeFinder.from_file_name(m[:file])
- if @lang == "stl" && link.match(/^https?:\/\/(www\.)?github\.com.*\/blob\//)
-
+ if @lang == "stl" && link.match?(/^https?:\/\/(www\.)?github\.com.*\/blob\//)
@model_file = @lang.dup
@raw = "https://render.githubusercontent.com/view/solid?url=" + self.raw_template(m)
-
else
contents = URI.open(self.raw_template(m), read_timeout: timeout).read
diff --git a/lib/onebox/open_graph.rb b/lib/onebox/open_graph.rb
index f4379144..f78e3d08 100644
--- a/lib/onebox/open_graph.rb
+++ b/lib/onebox/open_graph.rb
@@ -17,10 +17,10 @@ def title_attr
!title.nil? ? "title='#{title}'" : ""
end
- def get_secure_image
- secure_link = URI(get(:image))
- secure_link.scheme = 'https'
- secure_link.to_s
+ def secure_image_url
+ secure_url = URI(get(:image))
+ secure_url.scheme = 'https'
+ secure_url.to_s
end
def method_missing(attr, *args, &block)
diff --git a/lib/onebox/preview.rb b/lib/onebox/preview.rb
index 48d29df2..ada48e1b 100644
--- a/lib/onebox/preview.rb
+++ b/lib/onebox/preview.rb
@@ -6,8 +6,8 @@ class Preview
client_exception = defined?(Net::HTTPClientException) ? Net::HTTPClientException : Net::HTTPServerException
WEB_EXCEPTIONS ||= [client_exception, OpenURI::HTTPError, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED]
- def initialize(link, options = Onebox.options)
- @url = link
+ def initialize(url, options = Onebox.options)
+ @url = url
@options = options.dup
allowed_origins = @options[:allowed_iframe_origins] || Onebox::Engine.all_iframe_origins
diff --git a/spec/fixtures/googledocs.response b/spec/fixtures/googledocs.response
new file mode 100644
index 00000000..be56d2cf
--- /dev/null
+++ b/spec/fixtures/googledocs.response
@@ -0,0 +1,182 @@
+Lorem Ipsum! - Dokumenty Google Język JavaScript nie jest włączony w przeglądarce, dlatego nie można otworzyć tego pliku. Włącz JavaScript i wczytaj ponownie.
Niektórych funkcji programu Word nie można wyświetlić w Dokumentach Google i zostaną one usunięte, jeśli wprowadzisz zmiany
Wyświetl szczegóły
diff --git a/spec/lib/onebox/engine/google_docs_onebox_spec.rb b/spec/lib/onebox/engine/google_docs_onebox_spec.rb
index f67a639c..9646cbf5 100644
--- a/spec/lib/onebox/engine/google_docs_onebox_spec.rb
+++ b/spec/lib/onebox/engine/google_docs_onebox_spec.rb
@@ -3,35 +3,25 @@
require "spec_helper"
describe Onebox::Engine::GoogleDocsOnebox do
- context "Spreadsheets" do
- let(:matcher) { described_class.new("https://docs.google.com/spreadsheets/d/SHEET_KEY/pubhtml") }
-
- it "should be a spreadsheet" do
- expect(matcher.send(:shorttype)).to eq (:sheets)
- end
+ before(:all) do
+ @link = "https://docs.google.com/document/d/DOC_KEY/pub"
+ fake(@link, response("googledocs"))
end
- context "Documents" do
- let(:matcher) { described_class.new("https://docs.google.com/document/d/DOC_KEY/pub") }
+ include_context "engines"
+ it_behaves_like "an engine"
- it "should be a document" do
- expect(matcher.send(:shorttype)).to eq (:docs)
+ describe "#to_html" do
+ it "has title" do
+ expect(html).to include("Lorem Ipsum!")
end
- end
- context "Presentaions" do
- let(:matcher) { described_class.new("https://docs.google.com/presentation/d/PRESENTATION_KEY/pub") }
-
- it "should be a presentation" do
- expect(matcher.send(:shorttype)).to eq (:slides)
+ it "has description" do
+ expect(html).to include("Lorem Ipsum Lorem ipsum dolor sit amet, consectetur adipiscing elit")
end
- end
-
- context "Forms" do
- let(:matcher) { described_class.new("https://docs.google.com/forms/d/FORMS_KEY/viewform") }
- it "should be a form" do
- expect(matcher.send(:shorttype)).to eq (:forms)
+ it "has icon" do
+ expect(html).to include("googledocs-onebox-logo g-docs-logo")
end
end
end
diff --git a/spec/lib/onebox/engine/json_spec.rb b/spec/lib/onebox/engine/json_spec.rb
index 7388c558..07ae2e39 100644
--- a/spec/lib/onebox/engine/json_spec.rb
+++ b/spec/lib/onebox/engine/json_spec.rb
@@ -15,6 +15,7 @@ class OneboxEngineJSON
def initialize(link)
@url = link
+ @options = {}
end
end
diff --git a/spec/lib/onebox/layout_spec.rb b/spec/lib/onebox/layout_spec.rb
index cd6b4f7f..63487d78 100644
--- a/spec/lib/onebox/layout_spec.rb
+++ b/spec/lib/onebox/layout_spec.rb
@@ -15,6 +15,10 @@
Onebox.options.load_paths << "directory_b"
end
+ after(:each) do
+ Onebox.options.load_paths.pop(2)
+ end
+
context "when template exists in directory_b" do
before(:each) do
allow_any_instance_of(described_class).to receive(:template?) { |_, path| path == "directory_b" }
@@ -40,10 +44,6 @@
expect(template_path).to include("template")
end
end
-
- after(:each) do
- Onebox.options.load_paths.pop(2)
- end
end
describe "#to_html" do
diff --git a/spec/lib/onebox/matcher_spec.rb b/spec/lib/onebox/matcher_spec.rb
index b9b3e6f4..f252ae02 100644
--- a/spec/lib/onebox/matcher_spec.rb
+++ b/spec/lib/onebox/matcher_spec.rb
@@ -14,8 +14,8 @@ def self.iframe_origins
describe Onebox::Matcher do
let(:opts) { { allowed_iframe_regexes: [/.*/] } }
- describe "oneboxed" do
+ describe "oneboxed" do
describe "with a path" do
let(:url) { "http://party.time.made.up-url.com/beep/boop" }
let(:matcher) { Onebox::Matcher.new(url, opts) }
@@ -101,6 +101,5 @@ def self.iframe_origins
expect(matcher.oneboxed).not_to be_nil
end
end
-
end
end
diff --git a/spec/lib/onebox/oembed_spec.rb b/spec/lib/onebox/oembed_spec.rb
index ae50dc0e..2b6d3bd2 100644
--- a/spec/lib/onebox/oembed_spec.rb
+++ b/spec/lib/onebox/oembed_spec.rb
@@ -4,7 +4,6 @@
require "onebox/oembed"
describe Onebox::Oembed do
-
it "excludes html tags" do
json = '{"text": ""}'
oembed = described_class.new(json)
diff --git a/spec/lib/onebox/open_graph_spec.rb b/spec/lib/onebox/open_graph_spec.rb
index cddf21be..ede7afa7 100644
--- a/spec/lib/onebox/open_graph_spec.rb
+++ b/spec/lib/onebox/open_graph_spec.rb
@@ -4,7 +4,6 @@
require "onebox/open_graph"
describe Onebox::OpenGraph do
-
it "excludes html tags in title" do
doc = Nokogiri::HTML('Did’ you <b>miss me</b>? - Album on Imgur ')
og = described_class.new(doc)
@@ -18,5 +17,4 @@
og = described_class.new(doc)
expect(og.image).to eq("http://test.com/test'ing.mp3")
end
-
end
diff --git a/spec/lib/onebox/preview_spec.rb b/spec/lib/onebox/preview_spec.rb
index 787fb56b..91226374 100644
--- a/spec/lib/onebox/preview_spec.rb
+++ b/spec/lib/onebox/preview_spec.rb
@@ -3,7 +3,6 @@
require "spec_helper"
describe Onebox::Preview do
-
before do
fake("https://www.amazon.com/product", response("amazon"))
end
@@ -70,7 +69,6 @@
result = preview.to_s
expect(result).not_to match(/onerror/)
end
-
end
describe "iframe sanitizer" do
@@ -100,7 +98,5 @@
result = preview.to_s
expect(result).to include ' src="https://thirdparty.example.com"'
end
-
end
-
end
diff --git a/templates/allowlistedgeneric.mustache b/templates/allowlistedgeneric.mustache
index 0d0fde9f..abc700be 100644
--- a/templates/allowlistedgeneric.mustache
+++ b/templates/allowlistedgeneric.mustache
@@ -3,15 +3,14 @@
{{#description}}
-{{description}}
+ {{description}}
{{/description}}
{{#data_1}}
- {{label_1}}: {{data_1}}
- {{#data_2}}
- {{label_2}}: {{data_2}}
- {{/data_2}}
- {{^data_2}}
-
- {{/data_2}}
+
+ {{label_1}}: {{data_1}}
+ {{#data_2}}
+ {{label_2}}: {{data_2}}
+ {{/data_2}}
+
{{/data_1}}