Skip to content

Commit

Permalink
Invert subtitle image before the OCR
Browse files Browse the repository at this point in the history
It makes it work way better on my side, especially
with subtitles containing multiple lines.
  • Loading branch information
MatMaul committed Feb 22, 2018
1 parent 089ef65 commit 4e45046
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
2 changes: 1 addition & 1 deletion doc/completion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ _vobsub2srt() {

case $cur in
-*)
COMPREPLY=( $( compgen -W '--dump-images --verbose --ifo --lang --langlist --tesseract-lang --tesseract-data --blacklist --y-threshold --min-width --min-height --max-threads' -- "$cur" ) )
COMPREPLY=( $( compgen -W '--dump-images --verbose --ifo --lang --langlist --tesseract-lang --tesseract-data --blacklist --y-threshold --min-width --min-height --max-threads --disable-invert' -- "$cur" ) )
;;
*)
_filedir '(idx|IDX|sub|SUB)'
Expand Down
2 changes: 2 additions & 0 deletions doc/vobsub2srt.1
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Minimum height in pixels to consider a subpicture for OCR (Default: 1).
.TP
\fB\-\-max\-threads\fR \fInb\fR
Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0).
\fB\-\-disable\-invert\fR
By default the image will be inverted before the OCR because it works better with black on white background, disable it.
.SH EXAMPLES
.nf
$ \fBvobsub2srt \-\-lang en foobar\fR
Expand Down
15 changes: 15 additions & 0 deletions src/vobsub2srt.c++
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,16 @@ void do_ocr(TessBaseAPI *tess_base_api, atomic<bool> *done, vector<sub_text_t> *
done->store(true);
}

void invert_image(unsigned width, unsigned height,
unsigned stride, unsigned char *image) {
for (unsigned y=0; y < height; y++) {
for (unsigned x=0; x < width; x++) {
unsigned index = y*stride + x;
image[index] = 255 - image[index];
}
}
}

struct ocr_thread_t {
ocr_thread_t(TessBaseAPI *tess_base_api)
: tess_base_api(tess_base_api)
Expand All @@ -153,6 +163,7 @@ struct ocr_thread_t {

int main(int argc, char **argv) {
bool dump_images = false;
bool disable_invert = false;
bool verb = false;
bool list_languages = false;
std::string ifo_file;
Expand Down Expand Up @@ -186,6 +197,7 @@ int main(int argc, char **argv) {
add_option("min-width", min_width, "Minimum width in pixels to consider a subpicture for OCR (Default: 9)").
add_option("min-height", min_height, "Minimum height in pixels to consider a subpicture for OCR (Default: 1)").
add_option("max-threads", max_threads, "Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0)").
add_option("disable-invert", disable_invert, "By default the image will be inverted before the OCR because it works better with black on white background, disable it").
add_unnamed(subname, "subname", "name of the subtitle files WITHOUT .idx/.sub ending! (REQUIRED)");
if(not opts.parse_cmd(argc, argv) or subname.empty()) {
return 1;
Expand Down Expand Up @@ -344,6 +356,9 @@ int main(int argc, char **argv) {
unsigned char *image_cpy = (unsigned char *)malloc(image_size);
memcpy(image_cpy, image, image_size);

if (!disable_invert)
invert_image(width, height, stride, image_cpy);

if (max_threads == 1)
do_ocr(ocr_thread->tess_base_api, &ocr_thread->done, &conv_subs, &mut, sub_counter, width, height, stride, image_cpy, start_pts, end_pts, verb);
else {
Expand Down

0 comments on commit 4e45046

Please sign in to comment.