From fdff5218c53f3897a2d1a9d6566d81d61aa9750a Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 22 Feb 2018 16:54:37 +0100 Subject: [PATCH] Invert subtitle image before the OCR It makes it work way better on my side, especially with subtitles containing multiple lines. --- src/vobsub2srt.c++ | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/vobsub2srt.c++ b/src/vobsub2srt.c++ index c3b1a53..9162b89 100644 --- a/src/vobsub2srt.c++ +++ b/src/vobsub2srt.c++ @@ -121,6 +121,16 @@ TessBaseAPI* init_tesseract(std::string tesseract_data_path, char const *tess_la return tess_base_api; } +void invert_image(unsigned width, unsigned height, + unsigned stride, unsigned char *image) { + for (unsigned y=0; y < height; y++) { + for (unsigned x=0; x < width; x++) { + unsigned index = y*stride + x; + image[index] = 255 - image[index]; + } + } +} + void do_ocr(ocr_thread_t *ocr_thread, vector *conv_subs, mutex *mut, unsigned counter, unsigned width, unsigned height, unsigned stride, unsigned char *image_cpy, unsigned start_pts, unsigned end_pts, bool verb) { @@ -153,6 +163,7 @@ void do_ocr(ocr_thread_t *ocr_thread, vector *conv_subs, mutex *mut, int main(int argc, char **argv) { bool dump_images = false; + bool disable_invert = false; bool verb = false; bool list_languages = false; std::string ifo_file; @@ -186,6 +197,7 @@ int main(int argc, char **argv) { add_option("min-width", min_width, "Minimum width in pixels to consider a subpicture for OCR (Default: 9)"). add_option("min-height", min_height, "Minimum height in pixels to consider a subpicture for OCR (Default: 1)"). add_option("max-threads", max_threads, "Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0)"). + add_option("disable-invert", disable_invert, "By default the image will be inverted before the OCR because it works better with black on white background"). add_unnamed(subname, "subname", "name of the subtitle files WITHOUT .idx/.sub ending! (REQUIRED)"); if(not opts.parse_cmd(argc, argv) or subname.empty()) { return 1; @@ -342,6 +354,9 @@ int main(int argc, char **argv) { unsigned char *image_cpy = (unsigned char *)malloc(image_size); memcpy(image_cpy, image, image_size); + if (!disable_invert) + invert_image(width, height, stride, image_cpy); + ocr_thread->done = false; ocr_thread->t = new thread(do_ocr, ocr_thread, &conv_subs, &mut, sub_counter, width, height, stride, image_cpy, start_pts, end_pts, verb);