Skip to content

Commit

Permalink
fix, chore: change to tqdm.write for better readability, update math …
Browse files Browse the repository at this point in the history
…instruct sys-prompts
  • Loading branch information
vTuanpham committed Nov 13, 2023
1 parent 511ca84 commit 7f4910f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion examples/TIGER-Lab-MathInstruct/TigerLabMathInstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def convert(self):
data_dict['answer_lengths'] = None
data_converted.append(data_dict)

self.converted_data = data_converted[20000:120000]
self.converted_data = data_converted[20000:22000]

pass

Expand Down
20 changes: 10 additions & 10 deletions translator/data_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def post_translate_validate(self) -> None:
if contain_code:
example_filters += 1
if len(self.converted_data) - 1 == idx:
print(f"Number of example with code: {example_filters}")
tqdm.write(f"Number of example with code: {example_filters}")
break
elif key == self.target_fields[-1]:
validated_translate_data.append(example)
Expand Down Expand Up @@ -153,10 +153,10 @@ def translate_converted(self, en_data: List[str] = None,
num_large_chunks = len(converted_data) / self.large_chunks_threshold
large_chunks = [converted_data[x:x + self.large_chunks_threshold] for x in
range(0, len(converted_data), self.large_chunks_threshold)]
print(f"\n Data is way too large, spliting data into {num_large_chunks} large chunk for sequential translation\n")
tqdm.write(f"\n Data is way too large, spliting data into {num_large_chunks} large chunk for sequential translation\n")

for idx, large_chunk in enumerate(tqdm(large_chunks, desc=f"Translating large chunk ", colour="red")):
print(f" Processing large chunk No: {idx}")
tqdm.write(f" Processing large chunk No: {idx}")
self.translate_converted(large_chunk=large_chunk)
return None

Expand All @@ -165,8 +165,8 @@ def translate_converted(self, en_data: List[str] = None,
num_threads = len(converted_data) / self.max_example_per_thread
chunks = [converted_data[x:x + self.max_example_per_thread] for x in
range(0, len(converted_data), self.max_example_per_thread)]
print(f"\n Data too large, splitting data into {num_threads} chunk, each chunk is {len(chunks[0])}"
f" Processing with multithread...\n")
tqdm.write(f"\n Data too large, splitting data into {num_threads} chunk, each chunk is {len(chunks[0])}"
f" Processing with multithread...\n")
with ThreadPoolExecutor(max_workers=num_threads) as executor:
futures = []
finished_task = 0
Expand All @@ -181,10 +181,10 @@ def callback_done(future):
with lock:
translated_data += future.result()
finished_task += 1
print("\nTask finished, adding translated data to result\n")
tqdm.write("\nTask finished, adding translated data to result...")
else:
print(f"\nTask failed with the following error: {future.exception()}"
f"\nrestarting thread when others finished\n")
tqdm.write(f"\nTask failed with the following error: {future.exception()}."
f"\nRestarting thread when others finished\n")
pass

for idx, chunk in enumerate(chunks):
Expand All @@ -207,7 +207,7 @@ def callback_done(future):
for future_dict in futures:
# If exception occurs in one of the thread, restart the thread with its specific chunk
if future_dict['future'].exception():
print(
tqdm.write(
f"\n Thread {future_dict['idx']} failed, restarting thread with chunk {future_dict['idx']}\n")
backup_future_chunk = executor.submit(self.translate_converted, chunks[future_dict['idx']],
f"Backup chunk {future_dict['idx']}", Translator())
Expand Down Expand Up @@ -240,7 +240,7 @@ def callback_done(future):
if not desc:
raise f" Connection timeout, please provide better connection"
else:
print(f"\n Connection timeout from thread {desc}\n")
tqdm.write(f"\n Connection timeout from thread {desc}\n")
raise f" Connection timeout raise from thread {desc}"

@abstractmethod
Expand Down

0 comments on commit 7f4910f

Please sign in to comment.