Skip to content

Commit

Permalink
Add new dalle export script
Browse files Browse the repository at this point in the history
  • Loading branch information
cguess committed Jul 18, 2024
1 parent 5ce8dfb commit 795e6d8
Showing 1 changed file with 71 additions and 0 deletions.
71 changes: 71 additions & 0 deletions lib/tasks/media_review.rake
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,75 @@ namespace :media_review do
puts "No AWS key for #{ai.id}"
end
end

desc "output for DallE stuff"
task dalle_output: :environment do |t, args|
s3 = Aws::S3::Client.new

# Create a folder in S3 for the output
time = Time.now
bucket_name = "dalle-export-#{time.strftime("%F")}-#{time.strftime("%H-%M")}"

bucket = Aws::S3::Bucket.new(bucket_name)

begin
bucket.create
rescue Aws::Errors::ServiceError => e
puts "Couldn't create bucket. Here's why: #{e.message}"
end

# Sanity check to make sure the bucket's there
resp = s3.list_buckets
raise "Bucket wasn't created??? #{bucket_name}" unless resp.buckets.map(&:name).include? bucket_name


# Get all media review that's images
# Go through all the media review
# output the high res image to the folder
# save the line to the CSV

image_archive_items = ArchiveItem.publically_viewable.select {|ai| !ai.images.empty? }
progress_bar = ProgressBar.create(title: "Media Review Items", total: MediaReview.count)

# Create a CSV object
csv_string = CSV.generate do |csv|
csv << ["id", "media_review.media_url", "media_review.date_published",
"media_authenticity_category_humanized", 'claim_review.author["name"]',
"media_review.url"]

image_archive_items.map do |archive_item|
# Download and then reupload (yes there's probably better ways to do this. Like a direct S3 copy... but meh)
media_review = archive_item.media_review
next if media_review.nil?
author_name = media_review.claim_reviews.empty? ? "" : media_review.claim_reviews.first.author["name"]

csv << [archive_item.id, media_review.media_url, media_review.date_published,
media_review.media_authenticity_category_humanized, author_name,
media_review.url]

archive_item.images.each do |image|
image.image.download do |tempfile|
object = Aws::S3::Object.new(bucket_name, archive_item.id)
object.upload_file(tempfile.path)
end
end

progress_bar.increment
end
end

# Save the CSV
object_key = "results.csv"
object = Aws::S3::Object.new(bucket_name, object_key)
object.put(body: csv_string)

# Get a link to the folder for ease of access
begin
url = bucket.object(object_key).presigned_url(:get)
puts "Created presigned URL: #{url}"
# URI(url)
rescue Aws::Errors::ServiceError => e
puts "Couldn't create presigned URL for #{bucket.name}:#{object_key}. Here's why: #{e.message}"
end
end
end

0 comments on commit 795e6d8

Please sign in to comment.