-
Notifications
You must be signed in to change notification settings - Fork 233
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
75 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
libs/megaparse/src/megaparse/formatter/structured_formatter/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from langchain_core.language_models.chat_models import BaseChatModel | ||
from megaparse.formatter.base import BaseFormatter | ||
from pydantic import BaseModel | ||
|
||
|
||
class StructuredFormatter(BaseFormatter): | ||
def __init__(self, model: BaseChatModel, output_model: type[BaseModel]): | ||
super().__init__(model) | ||
self.output_model = output_model | ||
|
||
async def format_string( | ||
self, | ||
text: str, | ||
file_path: str | None = None, | ||
) -> str: # FIXME: Return a structured output of type BaseModel ? | ||
raise NotImplementedError() |
42 changes: 42 additions & 0 deletions
42
libs/megaparse/src/megaparse/formatter/structured_formatter/custom_structured_formatter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from typing import Optional | ||
|
||
from langchain_core.language_models.chat_models import BaseChatModel | ||
from megaparse.formatter.structured_formatter import StructuredFormatter | ||
from pydantic import BaseModel | ||
|
||
|
||
class CustomStructuredFormatter(StructuredFormatter): | ||
async def format_string( | ||
self, | ||
text: str, | ||
file_path: str | None = None, | ||
) -> str: | ||
""" | ||
Structure the file using an AI language model. | ||
Args: | ||
text: The text to format. | ||
file_path: The file path of the text. | ||
model: The AI language model to use for formatting. | ||
Returns: | ||
The structured text. | ||
""" | ||
if not self.model: | ||
raise ValueError("A Model is needed to use the CustomStructuredFormatter.") | ||
print("Formatting text using CustomStructuredFormatter...") | ||
if len(text) < 0: | ||
raise ValueError( | ||
"A non empty text is needed to format text using CustomStructuredFormatter." | ||
) | ||
if not self.output_model: | ||
raise ValueError( | ||
"An output model is needed to structure text using CustomStructuredFormatter." | ||
) | ||
|
||
structured_model = self.model.with_structured_output(self.output_model) # type: ignore | ||
|
||
formatted_text = structured_model.invoke( | ||
f"Parse the text in a structured format: {text}" | ||
) | ||
assert isinstance(formatted_text, BaseModel), "Model output is not a BaseModel." | ||
|
||
return formatted_text.model_dump_json() |
11 changes: 0 additions & 11 deletions
11
libs/megaparse/src/megaparse/formatter/structured_output/__init__.py
This file was deleted.
Oops, something went wrong.