generated from opentensor/bittensor-subnet-template
-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'staging' into features/penalty-rewards
- Loading branch information
Showing
17 changed files
with
325 additions
and
132 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
from abc import ABC, abstractmethod | ||
import bittensor as bt | ||
import re | ||
|
||
|
||
class BaseCleaner(ABC): | ||
@abstractmethod | ||
def __init__(self, **kwargs): | ||
pass | ||
|
||
@abstractmethod | ||
def apply(self, generation: str) -> str: | ||
pass | ||
|
||
|
||
class RemoveQuotes(BaseCleaner): | ||
def __init__(self, **kwargs) -> None: | ||
pass | ||
|
||
def apply(self, generation: str) -> str: | ||
bt.logging.debug("Pruning unfinished sentence.") | ||
return generation.strip("\"'") | ||
|
||
|
||
class PruneEnding(BaseCleaner): | ||
def __init__(self, **kwargs): | ||
pass | ||
|
||
def apply(self, generation: str) -> str: | ||
punctuation_chars = [".", "?", "!"] | ||
|
||
if not any(char in generation for char in punctuation_chars): | ||
return generation | ||
|
||
if ( | ||
not generation.endswith(".") | ||
and not generation.endswith("?") | ||
and not generation.endswith("!") | ||
): | ||
index = max(generation.rfind(char) for char in punctuation_chars) | ||
return generation[ | ||
: index + 1 | ||
] # Go to the index of where the punctuation is, and include it (+1) | ||
else: | ||
return generation | ||
|
||
|
||
class RemoveRoles(BaseCleaner): | ||
def __init__(self, **kwargs): | ||
pass | ||
|
||
def capitalize_sentences(self, input_string): | ||
"""capitalize the first character after .!?""" | ||
sentences = re.split(r"(?<=[.!?])\s+", input_string) | ||
capitalized_sentences = [sentence.capitalize() for sentence in sentences] | ||
result_string = " ".join(capitalized_sentences) | ||
return result_string | ||
|
||
def apply(self, generation: str) -> str: | ||
roles = [ | ||
"User: ", | ||
"System: ", | ||
"Assistant: ", | ||
"Assistant, ", | ||
"Dear AI, ", | ||
"Dear AI ", | ||
"#Question: ", | ||
] | ||
for role in roles: | ||
if role in generation: | ||
generation = generation.replace(role, "") | ||
|
||
return self.capitalize_sentences( | ||
input_string=generation | ||
) # LLMs are good at being formal. Do the same if we remove a prefix. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from typing import List, Dict | ||
|
||
import bittensor as bt | ||
|
||
from prompting.cleaners.all_cleaners import RemoveQuotes, RemoveRoles, PruneEnding | ||
|
||
SUPPORTED_CLEANERS = { | ||
"remove_quotes": RemoveQuotes, | ||
"remove_roles": RemoveRoles, | ||
"prune_ending": PruneEnding, | ||
} | ||
|
||
|
||
class CleanerPipeline: | ||
def __init__(self, cleaning_pipeline: List[Dict]) -> None: | ||
"""CleanerPipeline is a pipeline that can be applied to any string to | ||
clean it of unwanted characters, punctuation, etc. | ||
cleaning_pipeline (List[Dict]): List of Dicts that define the cleaning pipeline. | ||
Dictionaries MUST have the keyword "name" to be valid. | ||
Example: [{"name": "remove_quotes", "kwargs": {}}, {"name": "prune_ending", "kwargs": {}}] | ||
""" | ||
self.cleaning_pipeline = cleaning_pipeline | ||
|
||
def apply(self, generation: str) -> str: | ||
"""Apply cleaning steps to generation listed in cleaning_pipeline. | ||
Args: | ||
generation (str): string generated from LLM or otherwise. | ||
Returns: | ||
str: Clean generated string. | ||
""" | ||
try: | ||
for cleaner in self.cleaning_pipeline: | ||
if "name" not in cleaner or cleaner["name"] not in SUPPORTED_CLEANERS: | ||
raise ValueError( | ||
f"Cleaning pipeline step {cleaner} must have a name, or must be in SUPPORTED_CLEANERS." | ||
) | ||
|
||
func = SUPPORTED_CLEANERS[cleaner["name"]] | ||
|
||
kwargs = cleaner.get("kwargs", {}) | ||
func = func(**kwargs) # instantiate the cleaner with the kwargs | ||
|
||
# apply all the filters for the specific task. | ||
generation = func.apply(generation=generation) | ||
|
||
return generation | ||
|
||
except Exception as E: | ||
bt.logging.error(f"Failed to apply cleaning pipeline {cleaner['name']}. {E},") | ||
return generation |
Oops, something went wrong.