From 9b27411710c0c9c03c92611146761240b8b66722 Mon Sep 17 00:00:00 2001 From: aw Date: Sat, 9 Nov 2024 17:53:12 +0400 Subject: [PATCH] feat(utils): Add strip_prefixes function to remove common question prefixes #61 --- src/harmony/parsing/util/__init__.py | 41 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/harmony/parsing/util/__init__.py b/src/harmony/parsing/util/__init__.py index 067cc7b..bff6538 100644 --- a/src/harmony/parsing/util/__init__.py +++ b/src/harmony/parsing/util/__init__.py @@ -1,4 +1,4 @@ -''' +""" MIT License Copyright (c) 2023 Ulster University (https://www.ulster.ac.uk). @@ -23,5 +23,42 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' +""" +from typing import List, Optional + + +def strip_prefixes(question: str, prefixes: Optional[List[str]] = None) -> str: + """ + Strips specified prefixes from a question string if they are present. + + Args: + question (str): The question string from which prefixes need to be removed. + prefixes (Optional[List[str]]): A list of prefixes to remove from the question. + If not provided, a default set of common prefixes is used. + + Returns: + str: The question string with the prefix removed, if a match is found; + otherwise, the original question. + + Example: + question = "Have you ever traveled abroad?" + result = strip_prefixes(question) + # result -> "traveled abroad?" + """ + default_prefixes = [ + "Have you ever", + "Did you ever", + "Do you", + "Is it true that", + "Would you say", + "Can you", + "Are you aware that", + "Do you think", + ] + prefixes = prefixes or default_prefixes + + for prefix in prefixes: + if question.lower().startswith(prefix.lower()): + return question[len(prefix) :].strip() + return question