Skip to content

Commit

Permalink
Add google seraching
Browse files Browse the repository at this point in the history
  • Loading branch information
cguess committed Sep 29, 2024
1 parent 57f876d commit cf0fb55
Show file tree
Hide file tree
Showing 15 changed files with 284 additions and 49 deletions.
41 changes: 23 additions & 18 deletions app/assets/stylesheets/search_results.scss
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,38 @@
@use "typography";

.search-results {
@extend .masonry;
@extend .masonry;

flex-flow: column-reverse wrap-reverse;
flex-flow: column-reverse wrap-reverse;

margin-bottom: clamp(1rem, 5vw, 5rem);
&:last-child {
margin-bottom: 0;
}
margin-bottom: clamp(1rem, 5vw, 5rem);
&:last-child {
margin-bottom: 0;
}

&.media {
flex-direction: column;
margin-right: 1rem;
}
}

.search-results__title {
@extend .h4;
@extend .h4;
}

.search-result {
@extend .box;
text-decoration: none;
width: 100%;
transition-property: transform;
transition-duration: 100ms;

&:hover {
transform: scale(1.03);
}
@extend .box;
text-decoration: none;
width: 100%;
transition-property: transform;
transition-duration: 100ms;

&:hover {
transform: scale(1.03);
}
}

.search-input-media {
max-width: shared.$screen-xs-ceiling;
margin: 0 auto;
max-width: shared.$screen-xs-ceiling;
margin: 0 auto;
}
12 changes: 11 additions & 1 deletion app/controllers/media_vault/search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ class SearchParams < T::Struct
const :msid, T.nilable(String) # Media Search ID
const :private, T.nilable(T::Boolean)
end

class MediaSearchParams < T::Struct
const :media, ActionDispatch::Http::UploadedFile
const :q, T.nilable(String)
const :private, T.nilable(T::Boolean)
end

# Search for

sig { void }
def index
typed_params = TypedParams[SearchParams].new.extract!(params)
Expand Down Expand Up @@ -67,12 +70,19 @@ def search_by_media(private: false)
end
end

def google_image_link
google_image_id = params[:google_image_id]
google_image = GoogleSearchResult.find(google_image_id)

send_data google_image.public_image_url
end

private

sig { params(id: String, private: T::Boolean).void }
def search_by_media_search_id(id, private: false)
@media_search = ImageSearch.find(id)
@results = @media_search.run
@results, @google_results = @media_search.run
@post_results = @results.filter_map { |result|
result.has_key?(:image) ? result[:image] : (
result.has_key?(:video) ? result[:video] : nil
Expand Down
28 changes: 28 additions & 0 deletions app/models/google_search_result.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
class GoogleSearchResult < ApplicationRecord
def public_image_url
Rails.cache.fetch("#{cache_key_with_version}/#{self.id}", expires_in: 24.hours) do
hydra = Typhoeus::Hydra.hydra
request = Typhoeus::Request.new(self.image_url, followlocation: true)
hydra.queue(request)
hydra.run

request.response.body
end
end

def self.from_api_response(api_response)
response = self.new
response.text = api_response["text"]
response.claimant = api_response["claimant"]
response.claim_date = DateTime.parse(api_response["claimDate"]) unless api_response["claimDate"].nil?
response.url = api_response["claimReview"].first["url"]
response.review_date = DateTime.parse(api_response["claimReview"].first["reviewDate"]) unless api_response["claimReview"].first["reviewDate"].nil?
response.rating = api_response["claimReview"].first["textualRating"]
response.title = api_response["claimReview"].first["title"]
response.language_code = api_response["claimReview"].first["languageCode"]
response.publisher_name = api_response["claimReview"].first["publisher"]["name"]
response.publisher_site = api_response["claimReview"].first["publisher"]["site"]

response
end
end
80 changes: 77 additions & 3 deletions app/models/image_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,38 @@
# may speed this up since we can throw a huge instance at a very short-lived process?

class ImageSearch < ApplicationRecord
# class GoogleSearchResult
# # TODO: Move this to a active record model instead
# attr_reader :text, :claimant, :claim_date, :url, :review_date, :rating, :language_code, :publisher_name, :publisher_site, :title
# attr_accessor :image_url

# def initialize(google_object)
# @text = google_object["text"]
# @claimant = google_object["claimant"]
# @claim_date = DateTime.parse(google_object["claimDate"]) unless google_object["claimDate"].nil?
# @url = google_object["claimReview"].first["url"]
# @review_date = DateTime.parse(google_object["claimReview"].first["reviewDate"]) unless google_object["claimReview"].first["reviewDate"].nil?
# @rating = google_object["claimReview"].first["textualRating"]
# @title = google_object["claimReview"].first["title"]
# @language_code = google_object["claimReview"].first["languageCode"]
# @publisher_name = google_object["claimReview"].first["publisher"]["name"]
# @publisher_site = google_object["claimReview"].first["publisher"]["site"]
# @image_url = nil
# end

# def image_file
# Rails.cache.fetch("#{@url}/#{@image_url}", expires_in: 30.seconds) do
# hydra = Typhoeus::Hydra.hydra
# request = Typhoeus::Request.new(@image_url, followlocation: true) }
# hydra.queue(request)
# hydra.run

# request.response.body
# end
# end
# end


include ImageUploader::Attachment(:image) # adds an `image` virtual attribute
include VideoUploader::Attachment(:video)

Expand All @@ -20,6 +52,9 @@ class ImageSearch < ApplicationRecord
self.video_derivatives! unless self.video.nil?
end

before_save do
end

# Implemented for Dhashable to find so that only certain number of frames of video are hashed
# when searching
sig { returns(Integer) }
Expand Down Expand Up @@ -64,7 +99,8 @@ def run
{ image: archive_item, distance: archive_items_raw[index]["levenshtein"] }
end

images
archive_final_items = images
google_items = search_google_by_media(self.image_url)
else
# For videos we have to loop
archive_items_raw = self.dhashes.flat_map do |dhash|
Expand All @@ -81,9 +117,11 @@ def run
# Videos are now sorted by distance, but we want to only keep the shortest distance
# Probably can get this into the sql above
videos.uniq! { |video_hash| video_hash[:video] }

videos
archive_final_items = videos
google_items = search_google_by_media(self.video_derivatives[:preview].url)
end

[archive_final_items, google_items]
end


Expand All @@ -110,4 +148,40 @@ def raw_sql(dhash)

sql
end

private

sig { params(image_url: String).returns(T::Array[GoogleSearchResult]) }
def search_google_by_media(image_url)
request = build_google_search_request(image_url, image: true)
response = request.run
response_body = JSON.parse(response.response_body)

results = response_body["results"].map { |google_object| GoogleSearchResult.from_api_response(google_object["claim"]) }

hydra = Typhoeus::Hydra.hydra
requests = results.map { |result| Typhoeus::Request.new(result.url, followlocation: true) }
requests.each { |request| hydra.queue(request) }
hydra.run

requests.each_with_index.map do |request, index|
image_elements = Nokogiri::HTML.parse(request.response.body).xpath('//meta[@property="og:image"]')
results[index].update(image_url: image_elements.first.values[1])
end

results
end

sig { params(query: String, image: T::Boolean).returns(Typhoeus::Request) }
def build_google_search_request(query, image: false)
url = "https://factchecktools.googleapis.com/v1alpha1/claims:imageSearch"
params = { imageUri: query, key: Figaro.env.FACTCHECK_TOOLS_API_KEY }

Typhoeus::Request.new(
url,
method: :get,
params: params,
headers: { Accept: "text/html" }
)
end
end
23 changes: 23 additions & 0 deletions app/views/media_vault/search/_google_search_result.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<div
class="archive-item"
data-publishing-platform="<%# publishing_platform_shortname %>"
data-controller="media-vault--archive"
data-media-vault--archive-caption-collapse-mode-value="<%# caption_is_collapsable ? 'collapsed' : 'static' %>"
>
<div class="archive-item__metadatum__label archive-item__inner flex gap-4 flex-row">
<img class="w-24 h-24 rounded-full" src="<%= media_vault_google_image_link_url(search_result.id) %>" alt="Rounded avatar">
<div class="flex flex-col gap-2">
<div class="flex flex-row gap-4">
<div>
<div class="font-semibold">Claim By <%= search_result.claimant.nil? ? "A Post" : search_result.claimant %>:</div>
<div><%= search_result.text %></div>
</div>
</div>

<div class="font-semibold"><%= search_result.publisher_name %> rating: <span><%= search_result.rating %></span></div>
<div><%= link_to search_result.title, search_result.url %></div>
<% date_to_show = search_result.review_date.nil? ? search_result.claim_date : search_result.review_date %>
<div style="color: #777;" class="text-sm"><span class="font-semibold">Reviewed:</span> <%= date_to_show.strftime("%-d %b %Y") %></div>
</div>
</div>
</div>
77 changes: 52 additions & 25 deletions app/views/media_vault/search/index.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,59 @@
<% end %>
</div>

<% if (@results || []).any? %>
<% if @author_results.try(:any?) %>
<h2 class="search-results__title">
<%= pluralize(@author_results.length, "matching author") %> found<% if @myvault %> in your MyVault&nbsp;<span style="font-size: 1.5rem; vertical-align: text-top; color: gray;" class="super">Beta</span><% end %>
</h2>
<div class="search-results search-results--authors">
<% @author_results.each do |author| %>
<%= link_to media_vault_author_path(author, platform: author.platform), class: "search-result" do %>
<%= render partial: "media_vault/authors/author_info/#{underscore_author_model(author)}", locals: { author: author } %>
<% end %>
<% end %>
<% if defined?(@media_search) %>
<div class="grid grid-cols-2 divide-neutral-200 divide-x-2">
<div>
<% if @post_results.any? %>
<h2 class="search-results__title">
<%= pluralize(@post_results.length, "matching MediaVault post") %> found<% if @myvault %> in your MyVault&nbsp;<span style="font-size: 1.5rem; vertical-align: text-top; color: gray;" class="super">Beta</span><% end %>:
</h2>
<div class="search-results search-results--posts archive-items archive-items--boxed-items archive-items--masonry media">
<% @post_results.each do |post| %>
<%= render partial: "media_vault/media/archive_item", locals: { preview: true, **post.normalized_attrs_for_views } %>
<% end %>
</div>
<% end %>
</div>
<div class="pl-4">
<h2 class="search-results__title">
<%= pluralize(@post_results.length, "matching Fact Check Explorer result") %> found:
</h2>
<div class="search-results search-results--posts archive-items archive-items--boxed-items archive-items--masonry media">
<% @google_results.each do |post| %>
<%= render partial: "media_vault/search/google_search_result", locals: { search_result: post } %>
<% end %>
</div>
</div>
<% end %>
</div>
<% else %>
<% if (@results || []).any? %>
<% if @author_results.try(:any?) %>
<h2 class="search-results__title">
<%= pluralize(@author_results.length, "matching author") %> found<% if @myvault %> in your MyVault&nbsp;<span style="font-size: 1.5rem; vertical-align: text-top; color: gray;" class="super">Beta</span><% end %>
</h2>
<div class="search-results search-results--authors">
<% @author_results.each do |author| %>
<%= link_to media_vault_author_path(author, platform: author.platform), class: "search-result" do %>
<%= render partial: "media_vault/authors/author_info/#{underscore_author_model(author)}", locals: { author: author } %>
<% end %>
<% end %>
</div>
<% end %>

<% if @post_results.any? %>
<h2 class="search-results__title">
<%= pluralize(@post_results.length, "matching post") %> found<% if @myvault %> in your MyVault&nbsp;<span style="font-size: 1.5rem; vertical-align: text-top; color: gray;" class="super">Beta</span><% end %>:
</h2>
<div class="search-results search-results--posts archive-items archive-items--boxed-items archive-items--masonry">
<% @post_results.each do |post| %>
<%= render partial: "media_vault/media/archive_item", locals: { preview: true, **post.normalized_attrs_for_views } %>
<% end %>
</div>
<% end %>
<% if @post_results.any? %>
<h2 class="search-results__title">
<%= pluralize(@post_results.length, "matching post") %> found<% if @myvault %> in your MyVault&nbsp;<span style="font-size: 1.5rem; vertical-align: text-top; color: gray;" class="super">Beta</span><% end %>:
</h2>
<div class="search-results search-results--posts archive-items archive-items--boxed-items archive-items--masonry">
<% @post_results.each do |post| %>
<%= render partial: "media_vault/media/archive_item", locals: { preview: true, **post.normalized_attrs_for_views } %>
<% end %>
</div>
<% end %>

<% else %>
<h1 class="text--center">No results found<%= " in your MyVault" if @myvault %>.</h1>
<% end %>
<% else %>
<h1 class="text--center">No results found<%= " in your MyVault" if @myvault %>.</h1>
<% end %>
<% end %>
</div>
1 change: 1 addition & 0 deletions config/initializers/figaro.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@

Figaro.require_keys("OLLAMA_URL")
Figaro.require_keys("OLLAMA_PASSWORD")
Figaro.require_keys("FACTCHECK_TOOLS_API_KEY")
1 change: 1 addition & 0 deletions config/initializers/shrine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ def make_shrine_storage
Shrine.plugin :keep_files if ENV["STAGING"]
Shrine.plugin :remote_url, max_size: 20 * 1024 * 1024 # 20MB
Shrine.plugin :download_endpoint, prefix: "media/vault", host: "https://#{Figaro.env.PUBLIC_LINK_HOST}"
Shrine.plugin :remote_url
1 change: 1 addition & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@

get "search", to: "search#index", as: "search"
post "search_by_media", to: "search#search_by_media", as: "search_by_media"
get "search_image/:google_image_id", to: "search#google_image_link", as: "google_image_link"

get "authors/:platform/:id", to: "authors#show", as: "author"
resources :media, only: [:show, :destroy]
Expand Down
19 changes: 19 additions & 0 deletions db/migrate/20240928173524_create_google_search_results.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
class CreateGoogleSearchResults < ActiveRecord::Migration[7.0]
def change
create_table :google_search_results, id: :uuid do |t|
t.text :text
t.string :claimant
t.datetime :claim_date
t.string :url
t.datetime :review_date
t.string :rating
t.string :title
t.string :language_code
t.string :publisher_name
t.string :publisher_site
t.string :image_url

t.timestamps
end
end
end
Loading

0 comments on commit cf0fb55

Please sign in to comment.