Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfixes for Hindi text generation #333

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,15 @@ Output
![29](samples/29.jpg "2")


## Create images with Hindi text

It is simple! Just do `trdg -l hi -c 1000 -w 5`!

Output

![30](samples/30.jpg)


## Add new fonts

The script picks a font at random from the *fonts* directory.
Expand All @@ -184,6 +193,7 @@ The script picks a font at random from the *fonts* directory.
| fonts/ko | Korean |
| fonts/ja | Japanese |
| fonts/th | Thai |
| fonts/hi | Hindi |

Simply add/remove fonts until you get the desired output.

Expand Down
Binary file added samples/30.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
44 changes: 24 additions & 20 deletions trdg/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,23 +268,27 @@ def generate(
tess_box_name = "{}.box".format(name)

# Save the image
if out_dir is not None:
final_image.save(os.path.join(out_dir, image_name))
if output_mask == 1:
final_mask.save(os.path.join(out_dir, mask_name))
if output_bboxes == 1:
bboxes = mask_to_bboxes(final_mask)
with open(os.path.join(out_dir, box_name), "w") as f:
for bbox in bboxes:
f.write(" ".join([str(v) for v in bbox]) + "\n")
if output_bboxes == 2:
bboxes = mask_to_bboxes(final_mask, tess=True)
with open(os.path.join(out_dir, tess_box_name), "w") as f:
for bbox, char in zip(bboxes, text):
f.write(
" ".join([char] + [str(v) for v in bbox] + ["0"]) + "\n"
)
else:
if output_mask == 1:
return final_image, final_mask
return final_image
try:
if out_dir is not None:
final_image.save(os.path.join(out_dir, image_name))
if output_mask == 1:
final_mask.save(os.path.join(out_dir, mask_name))
if output_bboxes == 1:
bboxes = mask_to_bboxes(final_mask)
with open(os.path.join(out_dir, box_name), "w") as f:
for bbox in bboxes:
f.write(" ".join([str(v) for v in bbox]) + "\n")
if output_bboxes == 2:
bboxes = mask_to_bboxes(final_mask, tess=True)
with open(os.path.join(out_dir, tess_box_name), "w") as f:
for bbox, char in zip(bboxes, text):
f.write(
" ".join([char] + [str(v) for v in bbox] + ["0"]) + "\n"
)
else:
if output_mask == 1:
return final_image, final_mask
return final_image

except Exception as e:
print("Error while saving the image: {}".format(e))
Binary file removed trdg/fonts/hi/Lohit-Devanagari.ttf
Binary file not shown.
Binary file added trdg/fonts/hi/gargi.ttf
Binary file not shown.
26 changes: 7 additions & 19 deletions trdg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,26 +111,14 @@ def draw_bounding_boxes(


def make_filename_valid(value: str, allow_unicode: bool = False) -> str:
"""
Code adapted from: https://docs.djangoproject.com/en/4.0/_modules/django/utils/text/#slugify

Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize("NFKC", value)
else:
value = (
unicodedata.normalize("NFKD", value)
.encode("ascii", "ignore")
.decode("ascii")
)
value = re.sub(r"[^\w\s-]", "", value)

# Image names will be shortened to avoid exceeding the max filename length
#remove spaces if in last character only
value = re.sub(r'\s+$', '', value)

#replacing invalid characters with underscores
value = re.sub(r'[:?<>|*$`]', '', value)

# Truncate the resulting string to a maximum length of 200 characters
return value[:200]


Expand Down