diff --git a/app.py b/app.py index 294279d..62cd578 100644 --- a/app.py +++ b/app.py @@ -373,7 +373,7 @@ async def inference_request(): cache_json_result[-1], imageDims, area_ratio, color_format ) - result_json_string = await record_model(pipeline, processed_result_json) + result_json_string = await record_model(pipeline_name, pipeline, processed_result_json) # upload the inference results to the user's container as async task app.add_background_task( @@ -387,7 +387,7 @@ async def inference_request(): print(f"Took: {'{:10.4f}'.format(time.perf_counter() - seconds)} seconds") # TODO: Transform into logging return jsonify(processed_result_json), 200 - except (KeyError, InferenceRequestError) as error: + except (KeyError, InferenceRequestError, inference.ProcessInferenceResultError) as error: print(error) return jsonify(["InferenceRequestError: " + error.args[0]]), 400 @@ -456,10 +456,24 @@ async def test(): return CACHE["endpoints"], 200 - -async def record_model(pipeline: namedtuple, result: list): + +async def record_model(name: str, pipeline: namedtuple, result: list): + """ + Records the models in the pipeline to the result list. + + Args: + name (str): The name of the pipeline. + pipeline (namedtuple): The pipeline containing the models. + result (list): The result list to update. + + Returns: + str: The updated result list as a JSON string. + """ new_entry = [{"name": model.name, "version": model.version} for model in pipeline] - result[0]["models"] = new_entry + result[0].update({ + "pipeline": name, + "models": new_entry + }) return json.dumps(result, indent=4) @@ -494,7 +508,6 @@ async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, app.config["BLOB_CLIENT"] = await azure_storage_api.get_blob_client(connection_string) result_json = await azure_storage_api.get_pipeline_info(app.config["BLOB_CLIENT"], pipeline_blob_name, pipeline_version) except (azure_storage_api.AzureAPIErrors) as error: - print(error) raise ServerError("server errror: could not retrieve the pipelines") from error models = () diff --git a/custom_exceptions.py b/custom_exceptions.py deleted file mode 100644 index 48449a7..0000000 --- a/custom_exceptions.py +++ /dev/null @@ -1,10 +0,0 @@ -class ContainerNameError(Exception): - pass - - -class ProcessInferenceResultError(Exception): - pass - - -class ValidateEnvVariablesError(Exception): - pass diff --git a/docs/nachet-inference-documentation.md b/docs/nachet-inference-documentation.md index 3bed546..e5c7df9 100644 --- a/docs/nachet-inference-documentation.md +++ b/docs/nachet-inference-documentation.md @@ -34,10 +34,10 @@ output"|G>"return result.json to user"] subgraph SB1[In pipeline] - direction TB +  direction TB - B(model 01)-->|"model output - send to the next one"|C(model 02)-->|can have n models|D(n models) +  B(model 01)-->|"model output +  send to the next one"|C(model 02)-->|can have n models|D(n models) end ``` @@ -61,80 +61,83 @@ to a model and receive the result. ```mermaid sequenceDiagram - title: Sequence Diagram for inference request 1.2.1 - actor Client - participant Frontend - participant Backend - participant Blob storage - participant Model - - Backend-)+Backend: run() - Note over Backend,Blob storage: initialisation - Backend-)Backend: before_serving() - Backend-)Backend: get_pipelines() - alt - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: return pipelines_models.json - else - Backend-)Frontend: error 500 Failed to retrieve data from the repository - end - Note over Backend,Blob storage: end of initialisation - - Client->>+Frontend: applicationStart() - Frontend-)Backend: HTTP POST req. - Backend-)Backend: get_model_endpoints_metadata() - Backend--)Frontend: Pipelines names res. - Note left of Backend: return pipelines names and metadata - - Frontend->>Client: application is ready - Client-->>Frontend: client ask action from specific pipeline - Frontend-)Backend: HTTP POST req. - Backend-)Backend: inference_request(pipeline_name, folder_name, container_name, imageDims, image) - alt missing arguments - Backend-)Frontend: Error 400 missing arguments - end - alt wrong pipeline name - Backend-)Frontend: Error 400 wrong pipeline name - end - alt wrong header - Backend-)Frontend: Error 400 wrong header on file - end - - Backend-)Backend: mount_container(connection_string(Environnement Variable, container_name)) - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: container_client - - Backend-)Backend: Generate Hash(image_bytes) - - Backend-)Backend: upload_image(container_client, folder_name, image_bytes, hash_value) - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: blob_name - - Backend-)Backend: get_blob(container_client, blob_name) - Backend-)+Blob storage: HTTP POST req. - Blob storage--)-Backend: blob - - loop for every model in pipeline - Backend-)Backend: model.entry_function(model, previous_result) - note over Backend, Blob storage: Every model has is own entry_function - Backend-)Backend: request_factory(previous_result, model) - Backend-)Backend: urllib.request.Request(endpoint_url, body, header) - Backend-)+Model: HTTP POST req. - Model--)-Backend: Result res. - alt if model has process_inference_function - Backend-) Backend: model.inference_function(previous_result, result_json) - end - end - note over Backend, Blob storage: End of the loop - par Backend to Frontend - Backend-)Backend: inference.process_inference_results(result_json, imageDims) - Backend--)Frontend: Processed result res. - Frontend--)-Client: display result - and Backend to Blob storage - note over Backend, Blob storage: record the result produced by the model - Backend-)Backend: upload_inference_result(container_client, folder_name, result_json_string, hash_value) - Backend-)-Blob storage: HTTP POST req. - end + title: Sequence Diagram for inference request 1.2.1 +  actor Client +  participant Frontend +  participant Backend +  participant Blob storage +  participant Model + +  Backend-)+Backend: run() +  Note over Backend, Blob storage: initialization +  Backend-)Backend: before_serving() +  Backend-)Backend: get_pipelines() +  alt +  Backend-)+Blob storage: HTTP POST req. +  Blob storage--)-Backend: return pipelines_models.json +  else +  Backend-)Frontend: error 500 Failed to retrieve data from the repository +  end +  Note over Backend, Blob storage: end of initialization + +  Client->>+Frontend: applicationStart() +  Frontend-)Backend: HTTP POST req. +  Backend-)Backend: get_model_endpoints_metadata() +  Backend--)Frontend: Pipelines names res. +  Note left of Backend: return pipelines names and metadata + +  Frontend-->>-Client: application is ready +  Client->>+Frontend: Client asks actions from a specific pipeline +  Frontend-)Backend: HTTP POST req. +  Backend-)Backend: inference_request(pipeline_name, folder_name, container_name, imageDims, image) +  alt missing arguments +  Backend-)Frontend: Error 400 missing arguments +  end +  alt wrong pipeline name +  Backend-)Frontend: Error 400 wrong pipeline name +  end +  alt wrong image +  Backend-)Frontend: Error 400 this picture was not validated +  end + +  Backend-)Backend: mount_container(blob_service_client, container_name) +  Backend-)+Blob storage: HTTP POST req. +  Blob storage--)-Backend: container_client + +  Backend-)Backend: Generate Hash(image_bytes) + +  Backend-)Backend: upload_image(container_client, folder_name, image_bytes, hash_value) + +  loop for every model in pipeline +    Backend-)Backend: model.request_function(model, previous_result) +    note over Backend, Blob storage: Every model has its own request_function +    alt Get inference from model +      alt seed detector or nachet-6seeds model +        Backend->>+Model: urlopen(Request(model.endpoint, body, headers)) +        Model-->>-Backend: Result res. +      else seed-detector model has a process function +        Backend->>Backend: process(images_bytes, inf_result) +      end +    else swin model inference is requested +      loop for every image in previous_result +        note over Backend, Model: Swin can only accept 1 seed per images +        Backend->>+Model: urlopen(Request(model.endpoint, body, headers)) +        Model-->>-Backend: Result res. +        Backend->>Backend: results.append(Result) +      end +      Backend->>Backend: process(previous_result, results) +    end +  end +  note over Backend, Blob storage: End of the loop +  par Backend to Frontend +    Backend-)Backend: inference.process_inference_results(result_json, imageDims) +    Backend--)Frontend: Processed result res. +    Frontend--)-Client: display result +  and Backend to Blob storage +    note over Backend, Blob storage: record the result produced by the model +    Backend-)Backend: upload_inference_result(container_client, folder_name, result_json_string, hash_value) +    Backend-)-Blob storage: HTTP POST req. +  end ``` ![footer_for_diagram](https://github.com/ai-cfia/nachet-backend/assets/96267006/cf378d6f-5b20-4e1d-8665-2ba65ed54f8e) @@ -146,13 +149,13 @@ backend. It requests actions from selected models or pipelines based on certain checks. These checks include verifying that all arguments required to find or initialize the blob container and process the image have been transmitted to the function. It also checks if the selected pipeline is recognized by the system -and if the image sent for analysis has a valid header. +and if the image sent for analysis has been validated. If all the above checks pass, the function initializes or finds the user blob container and uploads the image. Next, it requests an inference from every model -in the pipeline. Each model specifies their `entry_function` (how to call and -retrieve data) and whether they have a `process_inference` function. Based on -these indications, the results are returned and stored in the cache. +in the pipeline. Each model specifies their `request_function` (how to call and +retrieve data). Based on these indications, the results are returned and stored +in the cache. If no other model is called, the last result is then processed and sent to the frontend. @@ -163,10 +166,13 @@ The inference request will process the following parameters: |Key parameters | Expected Value| |--|--| |model_name | The name of the pipeline| +|vallidator| A hash that has been returning to the frontend when the image is valid| |folder_name | The folder where the image is uploaded in the user's container| |container_name | The user's container| |imageDims | The dimension of the image| |image | The image encoded in b64 (ASCII)| +|area_ratio| The ratio specified by the user, default = 0.5| +|color_format| The ccolor format specified by the frontend, default = "hex"| Note that since the information is received from the frontend, the model_name is an abstraction for a pipeline. @@ -178,20 +184,23 @@ The inference request will return a list with the following information: |Boxes | 0 | Contains all the boxes returned by the inference request| |labelOccurence | 0 | Contains the number of label occurence| |totalBoxes | 0 | Boxes total number| +|pipeline| 0 | Contains the name of the pipeline that produced the inference| +|models| 0 | Contains the models used by the pipeline| |Box | 1 | Contains all the information of one seed in the image| |label | 1 | Contains the top label for the seed| |score | 1 | Contains the top score for the seed| |topN | 1 | Contains the top N scores for the seed| -|overlapping | 1 | Contains a boolean to tell if the box overlap with another one| +|overlapping | 1 | Contains a boolean to tell if the box overlaps with another one| |overlappingIndices | 1 | Contains the index of the overlapping box| |topX | 2 | The top x value of the box around a seed| |topY | 2 | The top y value of the box around a seed| |bottomX | 2 | The bottom x value of the box around a seed| |bottomY| 2 | The bottom y value of the box around a seed| -*for more look at [nachet-model-documentation](https://github.com/ai-cfia/nachet-backend/blob/51-implementing-2-models/docs/nachet-model-documentation.md#return-value-of-models)* +*for more look at [nachet-model-documentation](nachet-model-documentation.md#return-value-of-models)* -**topN** contains the top 5 predictions of the models: +**topN** contains the top n predictions of a given model. For now, only `Swin` +returns a `topN` key with 5 results: ```json "topN": [ @@ -220,19 +229,22 @@ The inference request will return a list with the following information: ### Blob storage and Pipeline versioning +:warning: **This section will need adjustment after the implementation of the +datastore into Nachet** + To keep track of the various pipeline iterations and versions, JSON files are -stored in the blob storage. Users can add the JSON to the blob storage +stored in the blob storage. Datascientists can add the JSON to the blob storage using the `pipelines_version_insertion.py` script. This allows for easy management of model and pipeline history. To use the script, 3 environment variables are necessary: -* NACHET_BLOB_PIPELINE_NAME - * Containing the blob name where the pipelines are stored -* NACHET_BLOB_PIPELINE_VERSION - * Containing the version the user wants to select -* NACHET_BLOB_PIPELINE_DECRYPTION_KEY - * The key to decrypt sensible data such as the API key and the endpoint of a model. +* **NACHET_BLOB_PIPELINE_NAME** + * Containing the blob name where the pipelines are stored +* **NACHET_BLOB_PIPELINE_VERSION** + * Containing the version the user wants to select +* **NACHET_BLOB_PIPELINE_DECRYPTION_KEY** + * The key to decrypt sensible data such as the API key and the endpoint of a model. #### In the code @@ -245,7 +257,6 @@ information and metadata to the frontend. async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, cipher_suite): """ Retrieves the pipelines from the Azure storage API. - Returns: - list: A list of dictionaries representing the pipelines. """ @@ -280,6 +291,8 @@ async def get_pipelines(connection_string, pipeline_blob_name, pipeline_version, ### Available Version of the JSON file +***Careful as major update might change pipeline and model attributes*** + |Version|Creation Date| Pipelines| |--|--|--| |0.1.3 | 2024-03-26 | Swin Transformer and 6 Seeds Detector| diff --git a/docs/nachet-model-documentation.md b/docs/nachet-model-documentation.md index 8d9da64..7474a25 100644 --- a/docs/nachet-model-documentation.md +++ b/docs/nachet-model-documentation.md @@ -8,7 +8,7 @@ tasks, including Image Classification, Image Segmentation, and Object Detection. ## Task -Nachet Interactive's models perfom the following tasks: +Nachet Interactive's models perform the following tasks: |Task|Action|Input/Output| |:--|:--|:-----| @@ -23,11 +23,11 @@ Nachet Interactive's models perfom the following tasks: ## List of models -|Model|Full name|Task|API Call Function|Inference Function|Active|Accuracy| +|Model|Full name|Task|API endpoint|Process Function|Active|Accuracy| |--|--|:--:|:--:|:--:|:--:|:--:| -|Nachet-6seeds | m-14of15seeds-6seedsmag | Object Detection | nachet_6seeds | None | Yes | - | -|Seed-detector | seed-detector-1 | Object Detection | seed_detector | process_image_slicing | Yes | - | -|Swin | swinv1-base-dataaugv2-1 | Classification | swin | process_swin_result | Yes | - | +|Nachet-6seeds | m-14of15seeds-6seedsmag | Object Detection | nachet-6seeds | None | Yes | - | +|Seed-detector | seed-detector-1 | Object Detection | seed-detector | process_image_slicing | Yes | - | +|Swin | swinv1-base-dataaugv2-1 | Classification | swin-endpoint | process_swin_result | Yes | - | ### Request Inference Function @@ -38,6 +38,11 @@ Seed-detector only returns "seed" as a label, and its inference needs to be processed and passed to the next model which assigns the correct label to the seeds. +Each request function is mapped with the endpoint of the model. This provides a +simple way of calling the right request function for a given model. + +You can look at the mapping here: [model init file](../model/__init__.py) + ## Return value of models ```json @@ -82,27 +87,38 @@ seeds. "labelOccurrence": { "seed_name": 1, }, - "totalBoxes": 1 + "totalBoxes": 1, + "pipeline": "pipeline_name", + "models": [ + { + "name": "model_name_01", + "version": "1" + }, + { + "name": "model_name_02", + "version": "1" + } + ] } ``` ### Why topN -We decided to named the top results property top N because this value can return -n predictions. Usually in AI, the top 5 result are use to measure the accuracy +We decided to name the top results property top N because this value can return +n predictions. Usually in AI, the top 5 results are used to measure the accuracy of a model. If the correct result is the top 5, then it is considered that the prediction was true. -This is useful in case were the user have is attention on more then 1 result. +This is useful in cases where the user has attention on more than 1 result. - > "Top N accuracy — Top N accuracy is when you measure how often your predicted - > class falls in the top N values of your softmax distribution." - [Nagda, R. (2019-11-08) *Evaluating models using the Top N accuracy metrics*. Medium](https://medium.com/nanonets/evaluating-models-using-the-top-n-accuracy-metrics-c0355b36f91b) + > "Top N accuracy — Top N accuracy is when you measure how often your predicted + > class falls in the top N values of your softmax distribution." + [Nagda, R. (2019-11-08) *Evaluating models using the Top N accuracy metrics*. Medium](https://medium.com/nanonets/evaluating-models-using-the-top-n-accuracy-metrics-c0355b36f91b) ### Box around seed The `box` key stores the value for a specific box around a seed. This helps the -frontend application build a red rectangle around every seed on the image. +frontend application builds a colored rectangle around every seed on the image. ![image](https://github.com/ai-cfia/nachet-backend/assets/96267006/469add8d-f40a-483f-b090-0ebcb7a8396b) @@ -116,9 +132,9 @@ header is necessary to avoid errors. ```python # Header for every model should be: headers = { - 'Content-Type': 'application/json', - 'Authorization': ('Bearer ' + endpoint_api_key), - 'azureml-model-deployment': model_name +  'Content-Type': 'application/json', +  'Authorization': ('Bearer ' + endpoint_api_key), +  'azureml-model-deployment': model_name } ``` @@ -134,11 +150,11 @@ determined whether a segmentation model requires a different body structure. # Object Detection model # Example: Nachet-6seeds and seed-detector body = { - 'input_data': { - 'columns': ['image'], - 'index': [0], - 'data': [image_bytes], - } +  'input_data': { +    'columns': ['image'], +    'index': [0], +    'data': [image_bytes], +  } } # Classification model @@ -150,8 +166,8 @@ body = b64encode(image) A list of common error models returns to the backend. -> To access the error from the model, go to the model endpoint in azure and look -> for the logs : CFIA/ACIA/workspace/endpoint/model/logs +> To access the error from the model, go to the model endpoint in Azure and look +> for the logs: CFIA/ACIA/workspace/endpoint/model/logs |Error|Model|Reason|Message| |--|--|--|--| @@ -159,17 +175,19 @@ A list of common error models returns to the backend. ## Pipeline and model data -In order to dynamically build the pipeline in the backend from the model, the -following data structure was designed. For now, the pipelines will have two keys -for their names (`model_name`, `piepline_name`) to support the frontend code -until it is changed to get the name of the pipeline with the correct key. +To dynamically build the pipeline in the backend from the model, the following +data structure was designed. For now, the pipelines will have two keys for their +names (`model_name`, `pipeline_name`) to support the frontend code until it is +changed to get the name of the pipeline with the correct key. ```yaml +--- version: + date: + pipelines: - models: - model_name: pipeline_name: created_by: creation_date: @@ -181,7 +199,7 @@ pipelines: default: models: - - task: + - tasks: endpoint: api_key: content_type: @@ -195,20 +213,24 @@ models: job_name: dataset_description: accuracy: + ``` ### Key Description -#### File Specific Keys +:warning: **The following section is subject to change after the implementation +of the datastore** + +#### File-Specific Keys |Key|Description|Expected Value Format| |--|--|--| |version|The version of the file|0.0.0| -|date|The date the file was upload|202x-mm-dd| -|pipelines|A list of available pipeline|| -|models|A list of available model|| +|date|The date the file was uploaded |202x-mm-dd| +|pipelines|A list of available pipelines || +|models|A list of available models || -#### Pipeline Specific Keys +#### Pipeline-Specific Keys |Key|Description|Expected Value Format| |--|--|--| @@ -222,7 +244,7 @@ models: |Accuracy|The prediction accuracy of the pipeline|0.8302| |default|Determine if the pipeline is the default one|true or false| -#### Model Specific Keys +#### Model-Specific Keys |Key|Description|Expected Value Format| |--|--|--| @@ -243,7 +265,10 @@ models: #### JSON Representation and Example -This how the file will be represented in the datastore. +:warning: **The following section is subject to change after the implementation +of the datastore** + +This is how the file will be represented in the datastore. ```json { diff --git a/model/__init__.py b/model/__init__.py index bdb979b..617f162 100644 --- a/model/__init__.py +++ b/model/__init__.py @@ -1,3 +1,7 @@ +""" +This module provides functions for requesting inferences from different models. +""" + from model.swin import request_inference_from_swin from model.seed_detector import request_inference_from_seed_detector from model.test import request_inference_from_test @@ -6,6 +10,6 @@ request_function = { "swin-endpoint": request_inference_from_swin, "seed-detector": request_inference_from_seed_detector, - "test": request_inference_from_test, - "nachet-6seeds": request_inference_from_nachet_6seeds + "nachet-6seeds": request_inference_from_nachet_6seeds, + "test": request_inference_from_test } diff --git a/model/inference.py b/model/inference.py index 2cf6b73..ecc1198 100644 --- a/model/inference.py +++ b/model/inference.py @@ -10,10 +10,13 @@ import numpy as np -from custom_exceptions import ProcessInferenceResultError from model.color_palette import primary_colors, light_colors, mixing_palettes, shades_colors +class ProcessInferenceResultError(Exception): + pass + + def generator(list_length): for i in range(list_length): yield i diff --git a/model/seed_detector.py b/model/seed_detector.py index 324d4e0..7e76ad2 100644 --- a/model/seed_detector.py +++ b/model/seed_detector.py @@ -1,6 +1,6 @@ """ -This file contains the function that requests the inference and processes the data from -the seed detector model. +This file contains the function that requests the inference and processes +the data from the seed detector model. """ import io @@ -10,7 +10,9 @@ from PIL import Image from collections import namedtuple from urllib.request import Request, urlopen, HTTPError -from custom_exceptions import ProcessInferenceResultError + +from model.inference import ProcessInferenceResultError + def process_image_slicing(image_bytes: bytes, result_json: dict) -> list: """ diff --git a/model/six_seeds.py b/model/six_seeds.py index b33e6e9..ff1d9ff 100644 --- a/model/six_seeds.py +++ b/model/six_seeds.py @@ -1,12 +1,15 @@ """ -This file contains the function that requests the inference and processes the data from -the nachet-6seeds model. +This file contains the function that requests the inference and processes +the data from the nachet-6seeds model. """ import json + from collections import namedtuple from urllib.request import Request, urlopen, HTTPError -from custom_exceptions import ProcessInferenceResultError + +from model.inference import ProcessInferenceResultError + async def request_inference_from_nachet_6seeds(model: namedtuple, previous_result: str): """ diff --git a/model/swin.py b/model/swin.py index 245e202..dea880f 100644 --- a/model/swin.py +++ b/model/swin.py @@ -1,13 +1,14 @@ """ -This file contains the function that requests the inference and processes the data from -the swin model. +This file contains the function that requests the inference and processes +the data from the swin model. """ import json from collections import namedtuple from urllib.request import Request, urlopen, HTTPError -from custom_exceptions import ProcessInferenceResultError + +from model.inference import ProcessInferenceResultError def process_swin_result(img_box:dict, results: dict) -> list: diff --git a/model/test.py b/model/test.py index 1ae8d00..d7c1dce 100644 --- a/model/test.py +++ b/model/test.py @@ -1,13 +1,10 @@ """ -This module contains functions for performing inference using different models. - -Functions: - request_inference_from_swin: Perform inference using the SWIN model on a list of images. - request_inference_from_seed_detector: Requests inference from the seed detector model using the provided previous result. - request_inference_from_nachet_six_seed: Requests inference from the Nachet Six Seed model. +This module contains functions for testing the inference procedure in +the backend. """ from collections import namedtuple -from custom_exceptions import ProcessInferenceResultError + +from model.inference import ProcessInferenceResultError async def request_inference_from_test(model: namedtuple, previous_result: str): diff --git a/pipelines/pipelines_version_insertion.py b/pipelines/pipelines_version_insertion.py index 818f047..5b32828 100644 --- a/pipelines/pipelines_version_insertion.py +++ b/pipelines/pipelines_version_insertion.py @@ -219,6 +219,7 @@ def pipeline_insertion( file_path (str): The path to the file containing the pipeline data. blob_service_client (BlobServiceClient): The BlobServiceClient object for accessing Azure Blob Storage. cipher_suite (Fernet): The Fernet cipher suite for encrypting sensitive data. + acount_name (str): The name of the Azure container. Returns: str: A message indicating the success or failure of the pipeline insertion. diff --git a/tests/test_inference_request.py b/tests/test_inference_request.py index 4b7f9b0..1fac250 100644 --- a/tests/test_inference_request.py +++ b/tests/test_inference_request.py @@ -66,6 +66,7 @@ def test_inference_request_successful(self, mock_container): "topN", "overlapping", "overlappingIndices", + "pipeline", "models" }