-
Notifications
You must be signed in to change notification settings - Fork 0
/
thumbnailer.rb
executable file
·256 lines (216 loc) · 8.17 KB
/
thumbnailer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#! /usr/bin/env ruby
##########################################################
# thumbnailer.rb
#
# automatically extract thumbnails from mp4s
#
##########################################################
require 'trollop'
require 'fastercsv'
require 'timecode'
require 'ftools'
##########################################################
# PARAMETERS & ERROR CHECKING
##########################################################
MOVIE_ROOT='/movies'
FPS=24 # default, doesn't really matter, just used for format conversion seconds->frames
##########################################################
# COMMAND LINE PARSING
##########################################################
opts = Trollop::options do
version "thumbnailer.rb v1.0 (c) 2011 Fandor.com / Our Film Festival, Inc."
banner <<-EOS
thumbnailer.rb extracts thumbnails from source mp4s based on film ID & timecode
Usage:
thumbnailer [file.csv]
thumbnailer expects file.csv to have a header row labeling three columns (in any order):
Film ID = film id
Title card timecode = timecode for title card thumbnail
Image 1 timecode = timecode for image thumbnail 1
Image 2 timecode = timecode for image thumbnail 2
Image 3 timecode = timecode for image thumbnail 3
(Timecodes should be formatted as h:mm:ss or hh:mm:ss)
EOS
opt :csv_file, "CSV file containing film ids and timecodes to extract", :type => String
opt :film_id, "Film ID to extract (if csv not specified)", :type => Integer, :default => 0
opt :timecode, "Timecode to extract (if csv not specified)", :type => String
opt :titlecard, "Is a single timecode extraction a timecode?", :default => false
opt :image_as_default, "Copy image jpg to 000xid.jpg for default images (csv only)", :default => true
opt :n_frames, "Number of frames to extract at each timecode", :default => 1
opt :offset, "Number of seconds before each timecode to begin extracting", :default => 0.0
opt :output_path, "Root folder for thumbnail output", :default => "new_thumbnails"
opt :dry_run, "Dry run (don't create thumbnails)", :default => false
opt :fix_existing, "Fix existing thumbnails (where extracted from 0*.jpg)", :default => false
end
$csv_file = opts[:csv_file]
$n_frames = opts[:n_frames]
$offset = opts[:offset]
$output_path = opts[:output_path]
$dry_run = opts[:dry_run]
$film_id = opts[:film_id]
$timecode = opts[:timecode]
$titlecard = opts[:titlecard]
$image_as_default = opts[:image_as_default]
$fix_existing = opts[:fix_existing]
# validate options
if $csv_file
Trollop::die "Input file #{$csv_file} does not exist!" unless File.exist?($csv_file)
elsif $film_id > 0
Trollop::die "Must specify a timecode to extract." unless $timecode
else
Trollop::die "Must specify a csv file, or a film id and timecode."
end
# for a given film id, return the path to the largest associated mp4 file (presumably the highest quality file) - EXCEPT exclude mp4's beginning with 0
def largest_film_file(id)
max_file = ''
max_size = 0
id_dir = id / 100
files = Dir.glob(File.join(MOVIE_ROOT, id_dir.to_s, id.to_s, "[1-9]*.mp4")) do |f|
f_size = File.size(f)
if f_size > max_size
max_file = f
max_size = f_size
end
end
max_file
end
# parse a potentially flaky h:mm:ss timecode into a Timecode object
def parse_timecode(time_string)
return unless time_string.is_a?(String)
(h, m, s) = time_string.split(/:/).map { |e| e.to_i }
begin
Timecode.at(h, m, s, 0, FPS)
rescue
# invalid timecode, return nil
return nil
end
end
def create_dir(d)
if $dry_run
puts "mkdir #{d}"
else
Dir.mkdir(d)
end
end
def output_folder(id)
# create output folders if necessary
create_dir($output_path) unless File.exist?($output_path)
path=File.join($output_path, id.to_s)
create_dir(path) unless File.exist?(path)
return path
end
def output_filename(id, timecode, title_card)
path=output_folder(id)
# replace colons with underlines for filenames
t = timecode.gsub(/:/, "_")
suffix = ($n_frames == 1) ? ".jpg" : "_%02d.jpg"
if title_card
return File.join(path, "#{id}_titlecard_#{t}#{suffix}")
else
return File.join(path, "#{id}_#{t}#{suffix}")
end
end
def movies_folder(id)
return File.join("/movies", (id.to_i / 100).to_s, id.to_s)
end
def movies_folder_filename(id, timecode, title_card)
t = timecode.gsub(/:/, "_")
suffix = ($n_frames == 1) ? ".jpg" : "_%02d.jpg"
if title_card
return File.join(movies_folder(id), "#{id}_titlecard_#{t}#{suffix}")
else
return File.join(movies_folder(id), "#{id}_#{t}#{suffix}")
end
end
def default_filename(id)
path=output_folder(id)
return File.join(path, "%06d.jpg" % id.to_i)
end
def extract_thumbnail(source_file, timecode, offset, id, title_card=false, as_default=false)
# convert string into timecode; return if not valid
t = parse_timecode(timecode)
return unless t
raise "Source file error!" unless source_file && File.exist?(source_file)
# add offset seconds and convert into something ffmpeg will understand
t = (t + offset * FPS).with_frames_as_fraction
# if the file already exists in movies tree, skip re-extracting
# that means if you want to re-extract, delete the file!
file = movies_folder_filename(id, t, title_card)
if File.exist?(file)
# puts "Thumbnail #{file} already exists."
# if fix_existing flag is set AND 0*.mp4 exists, allow re-extract to proceed
if $fix_existing && (Dir.glob(File.join(movies_folder(id), "0*.mp4")).length > 0)
puts "Re-extracting thumbnails for film id #{id}"
else
return
end
end
# now check that it doesn't already exist in the extraction path...
file = output_filename(id, t, title_card)
if File.exist?(file)
# puts "Thumbnail #{file} already exists."
return
end
if $dry_run
puts("ffmpeg -i #{source_file} -y -ss #{t} -vframes #{$n_frames} #{file}")
else
# p = fork do
puts "Exracting thumbnail from #{source_file}..."
system("ffmpeg -i #{source_file} -y -ss #{t} -vframes #{$n_frames} #{file} &> /dev/null")
if File.exist?(file)
if as_default
File.copy(file, default_filename(id))
end
else # file does not exist!
puts "Failed to extract #{file} (bad timecode)"
end
# end
# Process.detach(p)
end
end
def extract_thumbnails_from_csv(csv_file)
FasterCSV.foreach(csv_file, :headers => true, :header_converters => :symbol) do |row|
# skip if the row is already marked "done"
next if row[:done] && row[:done].downcase=="x"
# skip if there are no timecodes identified
next if (row[:title_card_timecode].to_s.length +
row[:image_1_timecode].to_s.length +
row[:image_2_timecode].to_s.length +
row[:image_3_timecode].to_s.length) < 5
id=row[:film_id].to_i
unless id > 0
puts "id #{row[:film_id]} is not valid!"
next
end
source_file = largest_film_file(id)
unless source_file && File.exist?(source_file)
puts "could not find movie file #{source_file} for #{row[:published]=='TRUE' ? 'published' : 'unpublished'} film id #{id}"
next
end
extract_thumbnail(source_file, row[:title_card_timecode], -$offset, id, true, false)
# use first image as default (000xxx.jpg)?
extract_thumbnail(source_file, row[:image_1_timecode], -$offset, id, false, $image_as_default)
extract_thumbnail(source_file, row[:image_2_timecode], -$offset, id)
extract_thumbnail(source_file, row[:image_3_timecode], -$offset, id)
end
end
def extract_single_timecode(id, timecode, titlecard)
unless id > 0
puts "id #{id} is not valid!"
return
end
source_file = largest_film_file(id)
unless source_file
puts "could not find movie file #{source_file} for film id #{id}"
return
end
extract_thumbnail(source_file, timecode, -$offset, id, titlecard)
end
################################################################################
## MAIN BODY
################################################################################
if $csv_file
extract_thumbnails_from_csv($csv_file)
else
extract_single_timecode($film_id, $timecode, $titlecard)
end