From 3a10420af9ee4cc46d8fade67f2ce2ee2b0378cc Mon Sep 17 00:00:00 2001 From: Geoffrey Date: Sun, 21 Jun 2020 00:27:35 +1200 Subject: [PATCH] Add support for Tesseract 4: invert images and compile as C++11 This contains the following PRs: - #72 by bit: Fix compile with gcc7 and tesseract 4 - #75 by oltodosel: inverting images for tesseract 4 --- CMakeLists.txt | 2 +- src/vobsub2srt.c++ | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d1954cb..cb0a17f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ include(CheckCXXSourceCompiles) include(CheckCXXSourceRuns) set(CMAKE_C_FLAGS "-std=gnu99") -set(CMAKE_CXX_FLAGS "-ansi -pedantic -Wall -Wextra -Wno-long-long") +set(CMAKE_CXX_FLAGS "-ansi -pedantic -Wall -Wextra -Wno-long-long -std=gnu++11") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -mtune=native -march=native -DNDEBUG -fomit-frame-pointer -ffast-math") # TODO -Ofast GCC 4.6 set(CMAKE_C_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) diff --git a/src/vobsub2srt.c++ b/src/vobsub2srt.c++ index 2f9e1a0..b9c131c 100644 --- a/src/vobsub2srt.c++ +++ b/src/vobsub2srt.c++ @@ -265,6 +265,25 @@ int main(int argc, char **argv) { << start_pts << ")\n"; } + + // While tesseract version 3.05 (and older) handle inverted image (dark background and light text) without problem, for 4.x version use dark text on light background. + // https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#inverting-images + + bool inverting_images = true; + + if (inverting_images) { + int size_r = width * height; + unsigned char* image_rev = new unsigned char[size_r]; + for (int i = 0; i < size_r; i++) + { + int val = static_cast(image[i]); + unsigned char cz = (255 - val); + image_rev[i] = cz; + } + + image = image_rev; + } + if(dump_images) { dump_pgm(subname, sub_counter, width, height, stride, image, image_size); }