From 4e45046f10be5f4d2892f12f835d3ca8036682d4 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 22 Feb 2018 16:54:37 +0100 Subject: [PATCH] Invert subtitle image before the OCR It makes it work way better on my side, especially with subtitles containing multiple lines. --- doc/completion.sh | 2 +- doc/vobsub2srt.1 | 2 ++ src/vobsub2srt.c++ | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/completion.sh b/doc/completion.sh index 4d61861..193b60f 100755 --- a/doc/completion.sh +++ b/doc/completion.sh @@ -44,7 +44,7 @@ _vobsub2srt() { case $cur in -*) - COMPREPLY=( $( compgen -W '--dump-images --verbose --ifo --lang --langlist --tesseract-lang --tesseract-data --blacklist --y-threshold --min-width --min-height --max-threads' -- "$cur" ) ) + COMPREPLY=( $( compgen -W '--dump-images --verbose --ifo --lang --langlist --tesseract-lang --tesseract-data --blacklist --y-threshold --min-width --min-height --max-threads --disable-invert' -- "$cur" ) ) ;; *) _filedir '(idx|IDX|sub|SUB)' diff --git a/doc/vobsub2srt.1 b/doc/vobsub2srt.1 index 6f30c40..d65e75d 100644 --- a/doc/vobsub2srt.1 +++ b/doc/vobsub2srt.1 @@ -49,6 +49,8 @@ Minimum height in pixels to consider a subpicture for OCR (Default: 1). .TP \fB\-\-max\-threads\fR \fInb\fR Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0). +\fB\-\-disable\-invert\fR +By default the image will be inverted before the OCR because it works better with black on white background, disable it. .SH EXAMPLES .nf $ \fBvobsub2srt \-\-lang en foobar\fR diff --git a/src/vobsub2srt.c++ b/src/vobsub2srt.c++ index 4b88a98..0ca40e2 100644 --- a/src/vobsub2srt.c++ +++ b/src/vobsub2srt.c++ @@ -142,6 +142,16 @@ void do_ocr(TessBaseAPI *tess_base_api, atomic *done, vector * done->store(true); } +void invert_image(unsigned width, unsigned height, + unsigned stride, unsigned char *image) { + for (unsigned y=0; y < height; y++) { + for (unsigned x=0; x < width; x++) { + unsigned index = y*stride + x; + image[index] = 255 - image[index]; + } + } +} + struct ocr_thread_t { ocr_thread_t(TessBaseAPI *tess_base_api) : tess_base_api(tess_base_api) @@ -153,6 +163,7 @@ struct ocr_thread_t { int main(int argc, char **argv) { bool dump_images = false; + bool disable_invert = false; bool verb = false; bool list_languages = false; std::string ifo_file; @@ -186,6 +197,7 @@ int main(int argc, char **argv) { add_option("min-width", min_width, "Minimum width in pixels to consider a subpicture for OCR (Default: 9)"). add_option("min-height", min_height, "Minimum height in pixels to consider a subpicture for OCR (Default: 1)"). add_option("max-threads", max_threads, "Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0)"). + add_option("disable-invert", disable_invert, "By default the image will be inverted before the OCR because it works better with black on white background, disable it"). add_unnamed(subname, "subname", "name of the subtitle files WITHOUT .idx/.sub ending! (REQUIRED)"); if(not opts.parse_cmd(argc, argv) or subname.empty()) { return 1; @@ -344,6 +356,9 @@ int main(int argc, char **argv) { unsigned char *image_cpy = (unsigned char *)malloc(image_size); memcpy(image_cpy, image, image_size); + if (!disable_invert) + invert_image(width, height, stride, image_cpy); + if (max_threads == 1) do_ocr(ocr_thread->tess_base_api, &ocr_thread->done, &conv_subs, &mut, sub_counter, width, height, stride, image_cpy, start_pts, end_pts, verb); else {