diff --git a/README.md b/README.md index 8381e9ca10..b115f96681 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,15 @@ Output ![29](samples/29.jpg "2") +## Create images with Hindi text + +It is simple! Just do `trdg -l hi -c 1000 -w 5`! + +Output + +![30](samples/30.jpg) + + ## Add new fonts The script picks a font at random from the *fonts* directory. @@ -184,6 +193,7 @@ The script picks a font at random from the *fonts* directory. | fonts/ko | Korean | | fonts/ja | Japanese | | fonts/th | Thai | +| fonts/hi | Hindi | Simply add/remove fonts until you get the desired output. diff --git a/samples/30.jpg b/samples/30.jpg new file mode 100644 index 0000000000..b2e2de44a7 Binary files /dev/null and b/samples/30.jpg differ diff --git a/trdg/data_generator.py b/trdg/data_generator.py index f96d554845..06647fdf91 100644 --- a/trdg/data_generator.py +++ b/trdg/data_generator.py @@ -268,23 +268,27 @@ def generate( tess_box_name = "{}.box".format(name) # Save the image - if out_dir is not None: - final_image.save(os.path.join(out_dir, image_name)) - if output_mask == 1: - final_mask.save(os.path.join(out_dir, mask_name)) - if output_bboxes == 1: - bboxes = mask_to_bboxes(final_mask) - with open(os.path.join(out_dir, box_name), "w") as f: - for bbox in bboxes: - f.write(" ".join([str(v) for v in bbox]) + "\n") - if output_bboxes == 2: - bboxes = mask_to_bboxes(final_mask, tess=True) - with open(os.path.join(out_dir, tess_box_name), "w") as f: - for bbox, char in zip(bboxes, text): - f.write( - " ".join([char] + [str(v) for v in bbox] + ["0"]) + "\n" - ) - else: - if output_mask == 1: - return final_image, final_mask - return final_image + try: + if out_dir is not None: + final_image.save(os.path.join(out_dir, image_name)) + if output_mask == 1: + final_mask.save(os.path.join(out_dir, mask_name)) + if output_bboxes == 1: + bboxes = mask_to_bboxes(final_mask) + with open(os.path.join(out_dir, box_name), "w") as f: + for bbox in bboxes: + f.write(" ".join([str(v) for v in bbox]) + "\n") + if output_bboxes == 2: + bboxes = mask_to_bboxes(final_mask, tess=True) + with open(os.path.join(out_dir, tess_box_name), "w") as f: + for bbox, char in zip(bboxes, text): + f.write( + " ".join([char] + [str(v) for v in bbox] + ["0"]) + "\n" + ) + else: + if output_mask == 1: + return final_image, final_mask + return final_image + + except Exception as e: + print("Error while saving the image: {}".format(e)) diff --git a/trdg/fonts/hi/Lohit-Devanagari.ttf b/trdg/fonts/hi/Lohit-Devanagari.ttf deleted file mode 100644 index ce04a33582..0000000000 Binary files a/trdg/fonts/hi/Lohit-Devanagari.ttf and /dev/null differ diff --git a/trdg/fonts/hi/gargi.ttf b/trdg/fonts/hi/gargi.ttf new file mode 100644 index 0000000000..8ac690af64 Binary files /dev/null and b/trdg/fonts/hi/gargi.ttf differ diff --git a/trdg/utils.py b/trdg/utils.py index 4084fa7150..5b326167b9 100644 --- a/trdg/utils.py +++ b/trdg/utils.py @@ -111,26 +111,14 @@ def draw_bounding_boxes( def make_filename_valid(value: str, allow_unicode: bool = False) -> str: - """ - Code adapted from: https://docs.djangoproject.com/en/4.0/_modules/django/utils/text/#slugify - Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated - dashes to single dashes. Remove characters that aren't alphanumerics, - underscores, or hyphens. Convert to lowercase. Also strip leading and - trailing whitespace, dashes, and underscores. - """ - value = str(value) - if allow_unicode: - value = unicodedata.normalize("NFKC", value) - else: - value = ( - unicodedata.normalize("NFKD", value) - .encode("ascii", "ignore") - .decode("ascii") - ) - value = re.sub(r"[^\w\s-]", "", value) - - # Image names will be shortened to avoid exceeding the max filename length + #remove spaces if in last character only + value = re.sub(r'\s+$', '', value) + + #replacing invalid characters with underscores + value = re.sub(r'[:?<>|*$`]', '', value) + + # Truncate the resulting string to a maximum length of 200 characters return value[:200]