multimodal_speech.json

{
  "id": "3d2b943f-40bf-4440-8cb9-ab5ffdcb1f37",
  "data": {
    "nodes": [
      {
        "width": 384,
        "height": 493,
        "id": "TextGenerationOutput-te6yr",
        "type": "genericNode",
        "position": {
          "x": 1062.2121145634685,
          "y": 945.8848653186284
        },
        "data": {
          "type": "TextGenerationOutput",
          "node": {
            "template": {
              "chain": {
                "required": false,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "password": false,
                "name": "chain",
                "display_name": "Chain",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "Chain",
                "list": false
              },
              "documents": {
                "required": false,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "password": false,
                "name": "documents",
                "display_name": "Documents",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "Document",
                "list": true
              },
              "multimodal": {
                "required": false,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "password": false,
                "name": "multimodal",
                "display_name": "Multi Modal",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "MultiModal",
                "list": false
              },
              "query": {
                "required": false,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "password": false,
                "name": "query",
                "display_name": "Query",
                "advanced": false,
                "input_types": [
                  "Input"
                ],
                "dynamic": false,
                "info": "Provide a Query to invoke a chain if stack doesn't contain prompt template and memory.",
                "type": "str",
                "list": false,
                "value": ""
              },
              "_type": "TextGenerationOutput"
            },
            "description": "Text Generation Output is used to download the Documents, Connect a component that returns Documents or connect a Chain Component and provide the Query to invoke a chain.",
            "base_classes": [
              "TextGenerationOutput"
            ],
            "display_name": "TextGenerationOutput",
            "custom_fields": {},
            "output_types": [
              "Output"
            ],
            "documentation": "https://docs.aiplanet.com/components/outputs",
            "beta": false,
            "error": null
          },
          "id": "TextGenerationOutput-te6yr"
        },
        "positionAbsolute": {
          "x": 1062.2121145634685,
          "y": 945.8848653186284
        }
      },
      {
        "width": 384,
        "height": 465,
        "id": "OpenAITextToSpeech-uyCJz",
        "type": "genericNode",
        "position": {
          "x": 439.8101166484677,
          "y": 1067.81775960124
        },
        "data": {
          "type": "OpenAITextToSpeech",
          "node": {
            "template": {
              "code": {
                "dynamic": true,
                "required": true,
                "placeholder": "",
                "show": false,
                "multiline": true,
                "value": "from genflow import CustomComponent\nfrom genflow.base.multimodal.utils import MediaType, MultiModal, SourceType, MediaFormat\nfrom openai import OpenAI\nimport base64\n\n\nclass OpenAITextToSpeech(CustomComponent):\n    display_name: str = \"OpenAITextToSpeech\"\n    description: str = \"Convert your text into Speech\"\n\n    VOICES = [\"alloy\", \"echo\", \"fable\", \"onyx\", \"nova\", \"shimmer\"]\n\n    def build_config(self):\n        return {\n            \"api_key\": {\n                \"display_name\": \"OpenAI API Key\",\n                \"field_type\": \"str\",\n                \"password\": True,\n                \"required\": True,\n            },\n            \"voice\": {\n                \"display_name\": \"Choose the Voice\",\n                \"value\": \"alloy\",\n                \"options\": self.VOICES,\n                \"required\": True,\n            },\n            \"text_input\": {\n                \"display_name\": \"Text Input\",\n                \"field_type\": \"str\",\n                \"value\": \"\",\n                \"input_types\": [\"Input\"],\n                \"required\": True,\n            },\n            \"code\": {\"show\": False},\n        }\n\n    def build(self, api_key: str, voice: str, text_input: str) -> MultiModal:\n        client = OpenAI(api_key=api_key)\n        response = client.audio.speech.create(\n            model=\"tts-1\", voice=voice, input=text_input\n        )\n\n        encoded_data = base64.b64encode(response.content)\n        encoded_string = encoded_data.decode(\"utf-8\")\n\n        return MultiModal(\n            source=encoded_string,\n            source_type=SourceType.BASE64,\n            media_type=MediaType.AUDIO,\n            media_format=MediaFormat.MP3,\n            prompt=text_input,\n        )\n",
                "password": false,
                "name": "code",
                "advanced": false,
                "type": "code",
                "list": false
              },
              "_type": "CustomComponent",
              "api_key": {
                "required": true,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "password": true,
                "name": "api_key",
                "display_name": "OpenAI API Key",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "str",
                "list": false,
                "value": ""
              },
              "text_input": {
                "required": true,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "value": "",
                "password": false,
                "name": "text_input",
                "display_name": "Text Input",
                "advanced": false,
                "input_types": [
                  "Input"
                ],
                "dynamic": false,
                "info": "",
                "type": "str",
                "list": false
              },
              "voice": {
                "required": true,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "value": "alloy",
                "password": false,
                "options": [
                  "alloy",
                  "echo",
                  "fable",
                  "onyx",
                  "nova",
                  "shimmer"
                ],
                "name": "voice",
                "display_name": "Choose the Voice",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "str",
                "list": true
              }
            },
            "description": "Convert your text into Speech",
            "base_classes": [
              "MultiModal"
            ],
            "display_name": "OpenAITextToSpeech",
            "custom_fields": {
              "api_key": null,
              "text_input": null,
              "voice": null
            },
            "output_types": [
              "OpenAITextToSpeech"
            ],
            "documentation": "",
            "beta": true,
            "error": null
          },
          "id": "OpenAITextToSpeech-uyCJz"
        },
        "selected": false,
        "dragging": false,
        "positionAbsolute": {
          "x": 439.8101166484677,
          "y": 1067.81775960124
        }
      },
      {
        "width": 384,
        "height": 457,
        "id": "Input-fFqOQ",
        "type": "genericNode",
        "position": {
          "x": -60.44911631733934,
          "y": 1036.332912771224
        },
        "data": {
          "type": "Input",
          "node": {
            "template": {
              "input_value": {
                "required": false,
                "placeholder": "",
                "show": true,
                "multiline": true,
                "value": "Hi There I am the tts model from OpenAI.",
                "password": false,
                "name": "input_value",
                "display_name": "Input Value",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "str",
                "list": false,
                "file_path": null
              },
              "input_key": {
                "required": true,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "value": "input",
                "password": false,
                "name": "input_key",
                "display_name": "Input Key",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "str",
                "list": false
              },
              "input_type": {
                "required": true,
                "placeholder": "",
                "show": true,
                "multiline": false,
                "value": "Text",
                "password": false,
                "options": [
                  "File",
                  "Url",
                  "Text"
                ],
                "name": "input_type",
                "display_name": "Input Type",
                "advanced": false,
                "dynamic": false,
                "info": "",
                "type": "str",
                "list": false
              },
              "_type": "Input"
            },
            "description": "Input is used to specify the type of input.",
            "base_classes": [
              "Input"
            ],
            "display_name": "Input",
            "custom_fields": {},
            "output_types": [
              "Input"
            ],
            "documentation": "https://docs.aiplanet.com/components/inputs",
            "beta": false,
            "error": null
          },
          "id": "Input-fFqOQ"
        },
        "selected": false,
        "dragging": false,
        "positionAbsolute": {
          "x": -60.44911631733934,
          "y": 1036.332912771224
        }
      }
    ],
    "edges": [
      {
        "source": "OpenAITextToSpeech-uyCJz",
        "sourceHandle": "{œbaseClassesœ:[œMultiModalœ],œdataTypeœ:œOpenAITextToSpeechœ,œidœ:œOpenAITextToSpeech-uyCJzœ}",
        "target": "TextGenerationOutput-te6yr",
        "targetHandle": "{œfieldNameœ:œmultimodalœ,œidœ:œTextGenerationOutput-te6yrœ,œinputTypesœ:null,œtypeœ:œMultiModalœ}",
        "data": {
          "targetHandle": {
            "fieldName": "multimodal",
            "id": "TextGenerationOutput-te6yr",
            "inputTypes": null,
            "type": "MultiModal"
          },
          "sourceHandle": {
            "baseClasses": [
              "MultiModal"
            ],
            "dataType": "OpenAITextToSpeech",
            "id": "OpenAITextToSpeech-uyCJz"
          }
        },
        "style": {
          "stroke": "#555"
        },
        "className": "stroke-foreground  stroke-connection",
        "animated": false,
        "id": "reactflow__edge-OpenAITextToSpeech-uyCJz{œbaseClassesœ:[œMultiModalœ],œdataTypeœ:œOpenAITextToSpeechœ,œidœ:œOpenAITextToSpeech-uyCJzœ}-TextGenerationOutput-te6yr{œfieldNameœ:œmultimodalœ,œidœ:œTextGenerationOutput-te6yrœ,œinputTypesœ:null,œtypeœ:œMultiModalœ}"
      },
      {
        "source": "Input-fFqOQ",
        "sourceHandle": "{œbaseClassesœ:[œInputœ],œdataTypeœ:œInputœ,œidœ:œInput-fFqOQœ}",
        "target": "OpenAITextToSpeech-uyCJz",
        "targetHandle": "{œfieldNameœ:œtext_inputœ,œidœ:œOpenAITextToSpeech-uyCJzœ,œinputTypesœ:[œInputœ],œtypeœ:œstrœ}",
        "data": {
          "targetHandle": {
            "fieldName": "text_input",
            "id": "OpenAITextToSpeech-uyCJz",
            "inputTypes": [
              "Input"
            ],
            "type": "str"
          },
          "sourceHandle": {
            "baseClasses": [
              "Input"
            ],
            "dataType": "Input",
            "id": "Input-fFqOQ"
          }
        },
        "style": {
          "stroke": "#555"
        },
        "className": "stroke-foreground  stroke-connection",
        "animated": false,
        "id": "reactflow__edge-Input-fFqOQ{œbaseClassesœ:[œInputœ],œdataTypeœ:œInputœ,œidœ:œInput-fFqOQœ}-OpenAITextToSpeech-uyCJz{œfieldNameœ:œtext_inputœ,œidœ:œOpenAITextToSpeech-uyCJzœ,œinputTypesœ:[œInputœ],œtypeœ:œstrœ}"
      }
    ],
    "viewport": {
      "x": 362.55897688137367,
      "y": -303.6549353737929,
      "zoom": 0.5717035911649995
    }
  },
  "description": "This stack uses the OpenAI text to speech model (TTS) to convert text given in the input to a voice speech.",
  "name": "multimodal_speech",
  "flow_type": "text_generation"
}