kserve · yuzisun · Feb 6, 2024 · Dec 5, 2023 · Dec 7, 2023 · Dec 11, 2023
diff --git a/specification/protocol/generate_rest.yaml b/specification/protocol/generate_rest.yaml
@@ -0,0 +1,186 @@
+openapi: 3.1.0
+info:
+  title: Open Inference API for text generation
+  description: Open Inference API for text generation
+  version: 1.0.0
+components:
+  schemas:
+    GenerateRequest:
+      type: object
+      required: 
+        - text_input
+      properties:
+        text_input:
+          type: string
+        parameters:
+          $ref: '#/components/schemas/GenerateParameters'
+    GenerateParameters:
+        allOf:
+          $ref: '#/components/schemas/GenerateParameters'
+        type: object
+        additionalProperties: {}
+        properties:
+          temperature:
+            type: number
+            format: float
+            default: null
+            minimum: 0
+            description: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+          top_p:
+            type: number
+            format: float
+            maximum: 1
+            minimum: 0
+            description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+          max_tokens:
+            type: integer
+            format: int32
+            default: 20
+            minimum: 0
+            maximum: 512
+            description: The maximum number of tokens to generate in the completion.
+          stop:
+            type: array
+            items:
+              type: string
+            maxItems: 5
+            description: Up to 5 sequences where the API will stop generating further tokens.
+    GenerateResponse:
+      type: object
+      required:
+        - text_output
+        - model_name
+      properties:
+        text_output:
+          type: string
+        model_name:
+          type: string
+        model_version:
+          type: string
+    GenerateStreamResponse:
+      type: object
+      required:
+        - text_output
+        - model_name
+      properties:
+        text_output:
+          type: string
+        model_name:
+          type: string
+        model_version:
+          type: string
+        finish_reason:
+          type: string
+    GenerateErrorResponse:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          type: string
+paths:
+  /v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate:
+    post:
+      parameters:
+        - name: model_name
+          required: true
+          in: path
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: generated text
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateResponse'
+        '422':
+          description: Input validation error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Input validation error
+        '424':
+          description: Generation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Request failed during generation
+        '429':
+          description: Model is overloaded
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Model is overloaded
+        '500':
+          description: Incomplete generation
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Incomplete generation
+
+  /v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate_stream:
+    post:
+      parameters:
+        - name: model_name
+          required: true
+          in: path
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: generated text stream
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateStreamResponse'
+        '422':
+          description: Input validation error
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Input validation error
+        '424':
+          description: Generation Error
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Request failed during generation
+        '429':
+          description: Model is overloaded
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Model is overloaded
+        '500':
+          description: Incomplete generation
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Incomplete generation