fix, chore: change to tqdm.write for better readability, update math …

…instruct sys-prompts
vTuanpham · Nov 13, 2023 · 7f4910f · 7f4910f
1 parent 511ca84
commit 7f4910f
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 11 deletions.
diff --git a/examples/TIGER-Lab-MathInstruct/TigerLabMathInstruct.py b/examples/TIGER-Lab-MathInstruct/TigerLabMathInstruct.py
@@ -84,7 +84,7 @@ def convert(self):
                 data_dict['answer_lengths'] = None
                 data_converted.append(data_dict)
 
-        self.converted_data = data_converted[20000:120000]
+        self.converted_data = data_converted[20000:22000]
 
         pass
 

diff --git a/translator/data_parser.py b/translator/data_parser.py
@@ -89,7 +89,7 @@ def post_translate_validate(self) -> None:
                     if contain_code:
                         example_filters += 1
                         if len(self.converted_data) - 1 == idx:
-                            print(f"Number of example with code: {example_filters}")
+                            tqdm.write(f"Number of example with code: {example_filters}")
                         break
                     elif key == self.target_fields[-1]:
                         validated_translate_data.append(example)
@@ -153,10 +153,10 @@ def translate_converted(self, en_data: List[str] = None,
             num_large_chunks = len(converted_data) / self.large_chunks_threshold
             large_chunks = [converted_data[x:x + self.large_chunks_threshold] for x in
                             range(0, len(converted_data), self.large_chunks_threshold)]
-            print(f"\n Data is way too large, spliting data into {num_large_chunks} large chunk for sequential translation\n")
+            tqdm.write(f"\n Data is way too large, spliting data into {num_large_chunks} large chunk for sequential translation\n")
 
             for idx, large_chunk in enumerate(tqdm(large_chunks, desc=f"Translating large chunk ", colour="red")):
-                print(f" Processing large chunk No: {idx}")
+                tqdm.write(f" Processing large chunk No: {idx}")
                 self.translate_converted(large_chunk=large_chunk)
             return None
 
@@ -165,8 +165,8 @@ def translate_converted(self, en_data: List[str] = None,
             num_threads = len(converted_data) / self.max_example_per_thread
             chunks = [converted_data[x:x + self.max_example_per_thread] for x in
                       range(0, len(converted_data), self.max_example_per_thread)]
-            print(f"\n Data too large, splitting data into {num_threads} chunk, each chunk is {len(chunks[0])}"
-                  f" Processing with multithread...\n")
+            tqdm.write(f"\n Data too large, splitting data into {num_threads} chunk, each chunk is {len(chunks[0])}"
+                       f" Processing with multithread...\n")
             with ThreadPoolExecutor(max_workers=num_threads) as executor:
                 futures = []
                 finished_task = 0
@@ -181,10 +181,10 @@ def callback_done(future):
                         with lock:
                             translated_data += future.result()
                             finished_task += 1
-                            print("\nTask finished, adding translated data to result\n")
+                            tqdm.write("\nTask finished, adding translated data to result...")
                     else:
-                        print(f"\nTask failed with the following error: {future.exception()}"
-                              f"\nrestarting thread when others finished\n")
+                        tqdm.write(f"\nTask failed with the following error: {future.exception()}."
+                                   f"\nRestarting thread when others finished\n")
                         pass
 
                 for idx, chunk in enumerate(chunks):
@@ -207,7 +207,7 @@ def callback_done(future):
                     for future_dict in futures:
                         # If exception occurs in one of the thread, restart the thread with its specific chunk
                         if future_dict['future'].exception():
-                            print(
+                            tqdm.write(
                                 f"\n Thread {future_dict['idx']} failed, restarting thread with chunk {future_dict['idx']}\n")
                             backup_future_chunk = executor.submit(self.translate_converted, chunks[future_dict['idx']],
                                                                   f"Backup chunk {future_dict['idx']}", Translator())
@@ -240,7 +240,7 @@ def callback_done(future):
             if not desc:
                 raise f" Connection timeout, please provide better connection"
             else:
-                print(f"\n Connection timeout from thread {desc}\n")
+                tqdm.write(f"\n Connection timeout from thread {desc}\n")
                 raise f" Connection timeout raise from thread {desc}"
 
     @abstractmethod