feat: enhance FluxAPI to load descriptions from YAML file and improve…

… prompt guidelines. Addresses -- but doesn't close #29, #33 and #34
jmaddington · Jan 9, 2025 · 2b9fab9 · 2b9fab9
1 parent b8a4db2
commit 2b9fab9
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 39 deletions.
diff --git a/api/app/clients/tools/structured/FluxAPI.js b/api/app/clients/tools/structured/FluxAPI.js
@@ -1,12 +1,11 @@
-// FluxAPI.js
-
+const fs = require('fs');
 const axios = require('axios');
-const { v4: uuidv4 } = require('uuid');
 const { Tool } = require('@langchain/core/tools');
+const { v4: uuidv4 } = require('uuid');
+const yaml = require('js-yaml');
 const { z } = require('zod');
 const { logger } = require('~/config');
 const { FileContext } = require('librechat-data-provider');
-const { processFileURL } = require('~/server/services/Files/process');
 
 class FluxAPI extends Tool {
   constructor(fields = {}) {
@@ -32,37 +31,67 @@ class FluxAPI extends Tool {
     this.description =
       "Use Flux to generate images from text descriptions. This tool is exclusively for visual content.";
 
-    // Aligned description_for_model with DALLE3 plugin
-    this.description_for_model = `// Whenever a description of an image is given, generate prompts (following these rules), and use Flux to create the image.
-    // Unless the user tells you not to, clean up their prompt and make it better before sending it to Flux.
-
-    // All prompts sent to Flux must abide by the following guidelines:
-      // 1. Prompts must be in English. Translate to English if needed.
-      // 2. One image per function call. Create only one image per request unless explicitly told to generate more than one image.
-      // 3. Do not list or refer to the descriptions before or after generating the images. You do not need to ask for permission to generate; just do it!
-      // 4. Visually describe the moods, details, structures, styles, and proportions of the image.
-      // 5. Craft your input by "showing" and not "telling" the imagery.
-      // 6. Generate images only once per human query unless explicitly requested by the user.
-      // 7. The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images.
-      // 8. When returning the image to the user, ensure the image is embedded in your response so the user can see it.
-      // 9. Do not include any additional text or descriptions around the image; just provide the image directly.
-      // 10. Do not mention or list download links, as they are available in the UI already.
-      // 11. Do not repeat the prompt or provide any captions or alt text.
-      // 12. Do not add any other commentary or explanations about the image.
-      
-      // 13. If more than one image is generated, embed each image separately in the response.
-      // - Default to the endpoint /v1/flux-pro-1.1 unless the user says otherwise.
-      // - Upsample if the user says so.
-      // - **Include the generated image(s) in your text response to the user by embedding the Markdown links.**
-      // - **Include the prompt you created for flux in your response so the user can see what you generated.**
-      
-      /* Available endpoints:
-       - /v1/flux-pro-1.1
-       - /v1/flux-pro
-       - /v1/flux-dev
-       - /v1/flux-pro-1.1-ultra
-      */
-    `;
+    // Try to load description from yaml file
+    let yamlDescription;
+    const yamlPaths = ['/app/fluxapi.yaml', '/workspaces/fluxapi.yaml'];
+
+    for (const path of yamlPaths) {
+      try {
+      if (fs.existsSync(path)) {
+        logger.debug(`[FluxAPI] Loading FluxAPI config from ${path}`);
+        const fileContents = fs.readFileSync(path, 'utf8');
+        const data = yaml.load(fileContents);
+        if (data && data.description_for_model) {
+        yamlDescription = data.description_for_model;
+        break;
+        }
+      }
+      } catch (err) {
+      logger.debug(`[FluxAPI] Failed to load FluxAPI config from ${path}: ${err.message}`);
+      }
+    }
+
+    if (!yamlDescription) {
+      this.description_for_model = `
+      // Use Flux to generate images from detailed text descriptions. Follow these guidelines:
+
+      1. Craft prompts in natural language, as if explaining to a human artist.
+      2. Be precise, detailed, and direct in your descriptions.
+      3. Structure your prompt to include:
+        - Subject: The main focus of the image
+        - Style: Artistic approach or visual aesthetic
+        - Composition: Arrangement of elements (foreground, middle ground, background)
+        - Lighting: Type and quality of light
+        - Color Palette: Dominant colors or scheme
+        - Mood/Atmosphere: Emotional tone or ambiance
+        - Technical Details: For photorealistic images, include camera settings, lens type, etc.
+        - Additional Elements: Supporting details or background information
+
+      4. Leverage Flux's advanced capabilities:
+        - Layered Images: Clearly describe elements in different layers of the image
+        - Contrasting Elements: Experiment with contrasting colors, styles, or concepts
+        - Transparent Materials: Describe see-through elements and their interactions
+        - Text Rendering: Utilize Flux's superior text integration abilities
+        - Creative Techniques: Consider style fusion, temporal narratives, or emotional gradients
+
+      5. For each human query, generate only one image unless explicitly requested otherwise.
+      6. Embed the generated image in your response without additional text or descriptions.
+      7. Do not mention download links or repeat the prompt.
+
+      8. Avoid common pitfalls:
+        - Don't overload the prompt with too many conflicting ideas
+        - Always guide the overall composition, not just individual elements
+        - Pay attention to lighting and atmosphere for mood and realism
+        - Avoid being too vague; provide specific details
+        - Always specify the desired artistic style to avoid defaulting to realism
+
+      Remember to balance specificity with creative freedom, allowing Flux to interpret and surprise you within the boundaries of your description.
+      `;
+    } else {
+      this.description_for_model = yamlDescription;
+    }
+
+    logger.debug('[FluxAPI] Description:', this.description_for_model);
 
     // Define the schema for structured input
     this.schema = z.object({
@@ -116,6 +145,11 @@ class FluxAPI extends Tool {
         .describe(
           'Generate less processed, more natural-looking images. Only works for /v1/flux-pro-1.1-ultra.'
         ),
+        endpoint: z
+      .enum(['/v1/flux-pro-1.1', '/v1/flux-pro', '/v1/flux-dev', '/v1/flux-pro-1.1-ultra'])
+      .optional()
+      .default('/v1/flux-pro')
+      .describe('Endpoint to use for image generation. Default is /v1/flux-pro.'),
     });
   }
 
@@ -131,7 +165,6 @@ class FluxAPI extends Tool {
     return `![generated image](${imageUrl})`;
   }
 
-
   returnValue(value) {
     if (this.isAgent === true && typeof value === 'string') {
       return [value, {}];
@@ -272,10 +305,12 @@ class FluxAPI extends Tool {
         logger.error('Error while saving the image:', details);
         return this.returnValue(`Failed to save the image locally. ${details}`);
       }
-    } // End of for-loop
+        } // End of for-loop
 
-    this.result = imagesMarkdown.trim();
-    return this.returnValue(this.result);
+        this.result = {
+      'Markdown Embeds for User': imagesMarkdown.trim().split('\n')
+        };
+        return this.returnValue(this.result);
   }
 }
 

diff --git a/fluxapi.yaml b/fluxapi.yaml
@@ -0,0 +1,8 @@
+description_for_model: |
+  Use Flux to generate stunning images. Guidelines:
+  1. Be specific in descriptions
+  2. Include style, composition, lighting
+  3. Consider mood and atmosphere
+  4. One image per request
+  5. Avoid conflicting elements
+  6. Always include a clown