souzatharsis · souzatharsis · Oct 27, 2024 · Oct 27, 2024
diff --git a/Makefile b/Makefile
@@ -4,7 +4,7 @@ lint:
 	mypy podcastfy/*.py
 
 test:
-	python3 -m pytest tests
+	poetry run pytest -n auto
 
 doc-gen:
 	sphinx-apidoc -f -o ./docs/source ./podcastfy

diff --git a/podcastfy/client.py b/podcastfy/client.py
@@ -43,20 +43,6 @@ def process_content(
 ):
     """
     Process URLs, a transcript file, image paths, or raw text to generate a podcast or transcript.
-
-    Args:
-        urls (Optional[List[str]]): A list of URLs to process.
-        transcript_file (Optional[str]): Path to a transcript file.
-        tts_model (str): The TTS model to use ('openai', 'elevenlabs' or 'edge'). Defaults to 'edge'.
-        generate_audio (bool): Whether to generate audio or just a transcript. Defaults to True.
-        config (Config): Configuration object to use. If None, default config will be loaded.
-        conversation_config (Optional[Dict[str, Any]]): Custom conversation configuration.
-        image_paths (Optional[List[str]]): List of image file paths to process.
-        is_local (bool): Whether to use a local LLM. Defaults to False.
-        text (Optional[str]): Raw text input to be processed.
-
-    Returns:
-        Optional[str]: Path to the final podcast audio file, or None if only generating a transcript.
     """
     try:
         if config is None:
@@ -69,32 +55,36 @@ def process_content(
         if conversation_config:
             conv_config.configure(conversation_config)
 
+        # Get output directories from conversation config
+        tts_config = conv_config.get('text_to_speech', {})
+        output_directories = tts_config.get('output_directories', {})
+
         if transcript_file:
             logger.info(f"Using transcript file: {transcript_file}")
             with open(transcript_file, "r") as file:
                 qa_content = file.read()
         else:
             content_generator = ContentGenerator(
-                api_key=config.GEMINI_API_KEY, conversation_config=conv_config.to_dict()
+                api_key=config.GEMINI_API_KEY, 
+                conversation_config=conv_config.to_dict()
             )
 
             combined_content = ""
 
             if urls:
                 logger.info(f"Processing {len(urls)} links")
                 content_extractor = ContentExtractor()
-                # Extract content from links
                 contents = [content_extractor.extract_content(link) for link in urls]
-                # Combine all extracted content
                 combined_content += "\n\n".join(contents)
 
             if text:
                 combined_content += f"\n\n{text}"
 
-            # Generate Q&A content
+            # Generate Q&A content using output directory from conversation config
             random_filename = f"transcript_{uuid.uuid4().hex}.txt"
             transcript_filepath = os.path.join(
-                config.get("output_directories")["transcripts"], random_filename
+                output_directories.get("transcripts", "data/transcripts"), 
+                random_filename
             )
             qa_content = content_generator.generate_qa_content(
                 combined_content,
@@ -105,15 +95,19 @@ def process_content(
 
         if generate_audio:
             api_key = None
-            # edge does not require an API key
             if tts_model != "edge":
                 api_key = getattr(config, f"{tts_model.upper()}_API_KEY")
 
-            text_to_speech = TextToSpeech(model=tts_model, api_key=api_key, conversation_config=conv_config.to_dict())
-            # Convert text to speech using the specified model
+            text_to_speech = TextToSpeech(
+                model=tts_model, 
+                api_key=api_key, 
+                conversation_config=conv_config.to_dict()
+            )
+
             random_filename = f"podcast_{uuid.uuid4().hex}.mp3"
             audio_file = os.path.join(
-                config.get("output_directories")["audio"], random_filename
+                output_directories.get("audio", "data/audio"), 
+                random_filename
             )
             text_to_speech.convert_to_speech(qa_content, audio_file)
             logger.info(f"Podcast generated successfully using {tts_model} TTS model")

diff --git a/podcastfy/config.yaml b/podcastfy/config.yaml
@@ -1,7 +1,3 @@
-output_directories:
-  transcripts: "./data/transcripts"
-  audio: "./data/audio"
-
 content_generator:
   gemini_model: "gemini-1.5-pro-latest"
   max_output_tokens: 8192
@@ -46,4 +42,4 @@ website_extractor:
     - 'aside'
     - 'noscript'
   user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-  timeout: 10  # Request timeout in seconds
+  timeout: 10  # Request timeout in seconds
diff --git a/podcastfy/conversation_config.yaml b/podcastfy/conversation_config.yaml
@@ -22,6 +22,9 @@ user_instructions: ""
 
 text_to_speech:
   default_tts_model: "edge"
+  output_directories:
+    transcripts: "./data/transcripts"
+    audio: "./data/audio"
   elevenlabs:
     default_voices:
       question: "Chris"
@@ -38,4 +41,4 @@ text_to_speech:
       answer: "en-US-EricNeural"
   audio_format: "mp3"
   temp_audio_dir: "data/audio/tmp/"
-  ending_message: "Bye Bye!"
+  ending_message: "Bye Bye!"