diff --git a/providers/base_provider.py b/providers/base_provider.py index e6df1f9..f332069 100644 --- a/providers/base_provider.py +++ b/providers/base_provider.py @@ -12,23 +12,38 @@ def __init__(self): self.translator = None @abstractmethod - def _do_translate(self, input_data: Union[str, List[str]], - src: str, dest: str, - fail_translation_code:str = "P1OP1_F", - **kwargs) -> Union[str, List[str]]: + def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, fail_translation_code:str = "P1OP1_F", **kwargs) -> Union[str, List[str]]: + """ + Perform translation of input data from source language to destination language. + + Args: + input_data (Union[str, List[str]]): The input data to be translated. It can be a single string or a list of strings. + src (str): The source language code. + dest (str): The destination language code. + fail_translation_code (str, optional): The code to be returned when translation fails. Defaults to "P1OP1_F". + **kwargs: Additional keyword arguments for translation. + + Returns: + Union[str, List[str]]: The translated output data. It can be a single string or a list of strings. + """ raise NotImplemented(" The function _do_translate has not been implemented.") - @cached(max_size=5000, ttl=400, thread_safe=False) - def translate(self, input_data: Union[str, List[str]], - src: str, dest: str, - fail_translation_code: str="P1OP1_F") -> Union[str, List[str]]: + @cached(max_size=10000, thread_safe=False) + def translate(self, input_data: Union[str, List[str]], src: str, dest: str, fail_translation_code: str="P1OP1_F") -> Union[str, List[str]]: """ - Translate text input_data from a language to another language - :param input_data: The input_data (Can be string or list of strings) - :param src: The source lang of input_data - :param dest: The target lang you want input_data to be translated - :param fail_translation_code: The code that can be use for unavoidable translation error and can be remove post translation - :return: str or list of str + Translates the input data from the source language to the destination language using the assigned translator object. + Args: + input_data (Union[str, List[str]]): The input data to be translated. It can be either a string or a list of strings. + src (str): The source language code. + dest (str): The destination language code. + fail_translation_code (str, optional): The code to be returned in case of translation failure. Defaults to "P1OP1_F". + Returns: + Union[str, List[str]]: The translated output data. It will have the same type as the input data. + Raises: + TypeError: If the input_data is not of type str or List[str], or if the elements of input_data list are not of type str. + Notes: + - The translator object instance must be assigned to self.translator before calling this method. + - The translation is performed by calling the _do_translate() method. """ # Type check for input_data @@ -43,7 +58,8 @@ def translate(self, input_data: Union[str, List[str]], # Perform the translation translated_instance = self._do_translate(input_data, - src=src, dest=dest, + src=src, + dest=dest, fail_translation_code=fail_translation_code) assert type(input_data) == type(translated_instance),\ diff --git a/providers/groq_provider.py b/providers/groq_provider.py index 8271df4..433c54b 100644 --- a/providers/groq_provider.py +++ b/providers/groq_provider.py @@ -137,11 +137,9 @@ def _do_translate(self, input_data: Union[str, List[str]], } ], "model": "llama3-8b-8192", - "temperature": 0.45, - "top_p": 0.5, + "temperature": 0.5, + "top_p": 0.65, "max_tokens": 8000, - "frequency_penalty": 0.4, - "presence_penalty": 0.25, "stream": False, } @@ -154,7 +152,6 @@ def _do_translate(self, input_data: Union[str, List[str]], # Clear the cache if the cache is too large if len(CACHE_INIT_PROMPT) > 5: - CACHE_INIT_PROMPT.pop() _, CACHE_INIT_PROMPT = pop_half_dict(CACHE_INIT_PROMPT) if len(CACHE_FAIL_PROMPT) > 10000: _, CACHE_FAIL_PROMPT = pop_half_set(CACHE_FAIL_PROMPT) diff --git a/providers/utils/utils.py b/providers/utils/utils.py index 4d65322..fbec12d 100644 --- a/providers/utils/utils.py +++ b/providers/utils/utils.py @@ -84,7 +84,7 @@ def wrapper(*args, **kwargs): return decorator -def brust_throttle(calls_per_minute: int, verbose: bool=False, extra_delay: float=1.25): +def brust_throttle(calls_per_minute: int, verbose: bool=False, extra_delay: float=1.25) -> Callable: """ Throttles function calls to a specified rate, with an optional extra delay. diff --git a/translator/data_parser.py b/translator/data_parser.py index 58102fc..6cb4706 100644 --- a/translator/data_parser.py +++ b/translator/data_parser.py @@ -147,18 +147,44 @@ def __init__(self, file_path: str, @property def get_translator(self) -> Provider: + """ + Returns a deep copy of the translator object. + + Returns: + Provider: A deep copy of the translator object. + """ return deepcopy(self.translator)() @staticmethod - def id_generator(size=6, chars=string.ascii_uppercase + string.digits) -> str: + def id_generator(size: int = 6, chars: str = string.ascii_uppercase + string.digits) -> str: + """ + Generate a random string of specified size using the given characters. + + Parameters: + - size (int): The length of the generated string. Default is 6. + - chars (str): The characters to be used for generating the string. Default is a combination of uppercase letters and digits. + + Returns: + - str: The randomly generated string. + """ return ''.join(random.choice(chars) for _ in range(size)) @staticmethod def split_list(input_list: List[str], max_sub_length: int) -> List[list]: + """ + Splits a list into sublists of a maximum specified length. + + Args: + input_list (List[str]): The input list to be split. + max_sub_length (int): The maximum length of each sublist. + + Returns: + List[list]: A list of sublists, where each sublist has a maximum length of max_sub_length. + """ return [input_list[x:x + max_sub_length] for x in range(0, len(input_list), max_sub_length)] @staticmethod - def flatten_list(nested_list: list) -> list: + def flatten_list(nested_list: List) -> List: ''' Turn a list from [[], [], []] -> [] '''