Skip to content

Commit

Permalink
Merge pull request #35 from PerfectThymeTech/marvinbuss/add_language_…
Browse files Browse the repository at this point in the history
…hint

Add Language Hint
  • Loading branch information
marvinbuss authored Jul 11, 2024
2 parents 12b3389 + d5197ed commit 23d6c19
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
2 changes: 2 additions & 0 deletions code/durablefunction/models/newstagextraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class VideoIndexerTranscriptItem(BaseModel):


class LoadVideoindexerContentResponse(BaseModel):
language: str
transcript_text: str
transcript: List[VideoIndexerTranscriptItem]

Expand All @@ -102,6 +103,7 @@ def from_json(data: str):
class InvokeLlmRequest(BaseModel):
content_text: str
content_details: str
content_language: str
instance_id: str

@staticmethod
Expand Down
8 changes: 7 additions & 1 deletion code/durablefunction/newstagextraction/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __create_llm_chain(
prompt.input_variables = [
"format_sample",
"news_content",
"language",
"news_show_details",
]

Expand Down Expand Up @@ -113,8 +114,13 @@ def invoke_llm_chain(
self,
news_content: str,
news_show_details: str,
language: str,
) -> InvokeLlmResponse:
result: InvokeLlmResponse = self.__llm_chain.invoke(
{"news_content": news_content, "news_show_details": news_show_details}
{
"news_content": news_content,
"news_show_details": news_show_details,
"language": language,
},
)
return result
26 changes: 15 additions & 11 deletions code/durablefunction/newstagextraction/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def newstag_extraction_orchestrator(context: df.DurableOrchestrationContext):
input_invoke_llm: InvokeLlmRequest = InvokeLlmRequest(
content_text=result_load_videoindexer_content.transcript_text,
content_details="This is a tv news show.",
content_language=result_load_videoindexer_content.language,
instance_id=context.instance_id,
)
result_invoke_llm: InvokeLlmResponse = yield context.call_activity_with_retry(
Expand Down Expand Up @@ -147,23 +148,25 @@ async def load_videoindexer_content(
data_json = json.loads(data)
logging.info(f"Loaded json data from storage: {data_json}")

# TODO: Handle errors

# Generate Transcript fom JSON
transcript_text_list = []
transcript_list = []
# Pop video from list
try:
transcript = (
data_json.get("videos", [{"insights": {"transcript": []}}])
.pop(0)
.get("insights", {"transcript": []})
.get("transcript", [])
video = data_json.get("videos", [{"insights": {"transcript": []}}]).pop(0)
transcript = video.get("insights", {"transcript": []}).get("transcript", [])
language = video.get("insights", {"sourceLanguage": "Unknown"}).get(
"sourceLanguage", "Unknown"
)
except IndexError as e:
logging.error(
f"Index error when loading the video indexer data, so setting empty transcript: '{e}'"
)
transcript = []
language = "Unknown"

# TODO: Handle errors

# Generate Transcript fom JSON
transcript_text_list = []
transcript_list = []

# Filter items in transcript
index_start = 0
Expand All @@ -189,7 +192,7 @@ async def load_videoindexer_content(
logging.info(f"Loaded transcript text: {transcript_text}")
logging.info(f"Loaded transcript items: {len(transcript_list)}")
response: LoadVideoindexerContentResponse = LoadVideoindexerContentResponse(
transcript_text=transcript_text, transcript=transcript_list
language=language, transcript_text=transcript_text, transcript=transcript_list
)

# Upload result
Expand Down Expand Up @@ -220,6 +223,7 @@ async def invoke_llm(inputData: InvokeLlmRequest) -> InvokeLlmResponse:
llm_result: Dict[Any] = llm_ineractor.invoke_llm_chain(
news_content=inputData.content_text,
news_show_details=inputData.content_details,
language=inputData.content_language,
)
logging.info(f"LLM response: {json.dumps(llm_result)}")

Expand Down

0 comments on commit 23d6c19

Please sign in to comment.