From 97aaf12713517888ef85dad7dc9c7c883c60fb21 Mon Sep 17 00:00:00 2001 From: Susankha Date: Mon, 10 Jan 2022 16:40:20 +0530 Subject: [PATCH 01/24] add sagemaker scorer for cpp runtime --- aws-sagemaker-hosted-scorer-cpp/.gitignore | 8 ++ aws-sagemaker-hosted-scorer-cpp/Dockerfile | 21 ++++ aws-sagemaker-hosted-scorer-cpp/README.md | 94 ++++++++++++++++ .../py/scorer/mojo-cpp-scorer.py | 103 ++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 aws-sagemaker-hosted-scorer-cpp/.gitignore create mode 100644 aws-sagemaker-hosted-scorer-cpp/Dockerfile create mode 100644 aws-sagemaker-hosted-scorer-cpp/README.md create mode 100644 aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py diff --git a/aws-sagemaker-hosted-scorer-cpp/.gitignore b/aws-sagemaker-hosted-scorer-cpp/.gitignore new file mode 100644 index 00000000..55c4b85a --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/.gitignore @@ -0,0 +1,8 @@ +.idea/ +.gradle/ +.vscode/ +build/ +dist/ +.cproject +.project +.settings/ diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile new file mode 100644 index 00000000..9d839ae8 --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.8 + +RUN apt-get update + +RUN pip3 install https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl + +RUN pip3 install datatable + +RUN pip3 install flask-restful + +ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' + +ENV MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' + +RUN mkdir -p /opt/ml/code + +COPY py/scorer/mojo-cpp-scorer.py /opt/ml/code + +WORKDIR /opt/ml/code + +ENTRYPOINT ["python", "mojo-cpp-scorer.py"] diff --git a/aws-sagemaker-hosted-scorer-cpp/README.md b/aws-sagemaker-hosted-scorer-cpp/README.md new file mode 100644 index 00000000..ad9c8857 --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/README.md @@ -0,0 +1,94 @@ +# DAI Deployment Template for Sagemaker Hosted C++ Scorer + +## Overview + +### Build Image + +Run the following command to build the docker image. + +```bash +docker build -t .dkr.ecr..amazonaws.com/h2oai/sagemaker-hosted-scorer: . +``` + +Verify that the Docker image was created, and take note of the version created. + +```bash +docker images --format "{{.Repository}} \t {{.Tag}}" | grep "h2oai/sagemaker-hosted-scorer" +``` + +### Optional: Test the build + +After building, run to test the produced Docker container locally like this: + +Step 1: Put a pipeline.mojo and valid license.sig into this directory (aws-sagemaker-hosted-scorer-cpp). + +Step 2: Start the docker instance. + + +``` +docker run \ + --rm \ + --init \ + -ti \ + -v `pwd`:/opt/ml/model \ + -p 8080:8080 \ + harbor.h2o.ai/opsh2oai/h2oai/sagemaker-hosted-scorer: \ + serve +``` +Step 3: Use the following curl command to test the container locally: + +``` +curl \ + -X POST \ + -H "Content-Type: application/json" \ + -d @payload.json http://localhost:8080/invocations +``` + +payload.json: + +``` +{ + "fields": [ + "field1", "field2" + ], + "includeFieldsInOutput": [ + "field2" + ], + "rows": [ + [ + "value1", "value2" + ], + [ + "value1", "value2" + ] + ] +} +``` + + +### Deploy to SageMaker + +Create `h2oai/sagemaker-hosted-scorer` repository in Sagemaker for the scorer service image. + +Use the output of the command below to `aws ecr login`: + +``` +aws ecr get-login-password --region | docker login --username AWS --password-stdin .dkr.ecr..amazonaws.com +``` + +Then push the scorer service image to AWS ECR (Elastic Container Registry): + +``` +docker push .dkr.ecr..amazonaws.com/h2oai/sagemaker-hosted-scorer: +``` + +Then create a model package with the pipeline file and the license key, and copy it to S3: + +``` +tar cvf mojo.tar pipeline.mojo license.sig +gzip mojo.tar +aws s3 cp mojo.tar.gz s3:/// +``` + +Next create the appropriate model and endpoint on Sagemaker. +Check that the endpoint is available with `aws sagemaker list-endpoints`. diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py new file mode 100644 index 00000000..e4c0e3eb --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py @@ -0,0 +1,103 @@ +from flask import Flask, request +from flask_restful import Resource, Api +import logging +import json +import os +import threading +import daimojo.model +import datatable as dt + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = Flask(__name__) +api = Api(app) + + +class MojoPipeline(object): + _model = None + _instance = None + _lock = threading.Lock() + + def __new__(cls): + if MojoPipeline._instance is None: + with MojoPipeline._lock: + if MojoPipeline._instance is None: + MojoPipeline._instance = super(MojoPipeline, cls).__new__(cls) + MojoPipeline._instance.setup() + return MojoPipeline._instance + + def setup(self): + mojo_file_path = os.getenv('MOJO_FILE_PATH') + self._model = daimojo.model(mojo_file_path) + + def get_feature_names(self): + """Return feature names""" + + return self._model.feature_names + + def get_types(self): + """Get column types""" + + types = {} + for index, value in enumerate(self._model.feature_names): + types[value] = self._model.feature_types[index] + return types + + def get_missing_values(self): + """Return mojo missing values""" + + return self._model.missing_values + + def get_prediction(self, d_frame): + """Score and return predictions on a given dataset""" + + return self._model.predict(d_frame) + + +class PredictAPI(Resource): + + def post(self): + request_body = request.get_json() + res = predict(request_body) + + json_response = json.dumps(res) + return json_response + + +class PingAPI(Resource): + + def get(self): + pass + + +api.add_resource(PredictAPI, '/invocations') +api.add_resource(PingAPI, '/ping') + + +def predict(request_body): + mojo = MojoPipeline() + + # properly define types based on the request elements order + tmp_frame = dt.Frame( + [list(x) for x in list(zip(*request_body['rows']))], + names=list(mojo.get_types().keys()), + stypes=list(mojo.get_types().values()) + ) + + d_frame = dt.fread( + text=tmp_frame.to_csv(), + columns=list(mojo.get_types().keys()), + na_strings=mojo.get_missing_values() + ) + + result = mojo.get_prediction(d_frame) + + return { + 'Scores': result.to_list() + } + + +if __name__ == '__main__': + logger.info('==== Starting the H2O mojo-cpp scoring server =====') + app.run(host='0.0.0.0', port=8080) From deda9eafee27ae7999b5c05874953ec028c28df3 Mon Sep 17 00:00:00 2001 From: Susankha Date: Mon, 10 Jan 2022 17:36:17 +0530 Subject: [PATCH 02/24] modified docker run command to test on locally --- aws-sagemaker-hosted-scorer-cpp/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/README.md b/aws-sagemaker-hosted-scorer-cpp/README.md index ad9c8857..3012d9b1 100644 --- a/aws-sagemaker-hosted-scorer-cpp/README.md +++ b/aws-sagemaker-hosted-scorer-cpp/README.md @@ -32,8 +32,7 @@ docker run \ -ti \ -v `pwd`:/opt/ml/model \ -p 8080:8080 \ - harbor.h2o.ai/opsh2oai/h2oai/sagemaker-hosted-scorer: \ - serve + harbor.h2o.ai/opsh2oai/h2oai/sagemaker-hosted-scorer: ``` Step 3: Use the following curl command to test the container locally: From fc67b58894468f5562930ef6e02d87dcb56a9b19 Mon Sep 17 00:00:00 2001 From: Susankha Date: Mon, 10 Jan 2022 21:48:23 +0530 Subject: [PATCH 03/24] changes API name of invocations --- aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py index e4c0e3eb..9ad56f86 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py @@ -55,7 +55,7 @@ def get_prediction(self, d_frame): return self._model.predict(d_frame) -class PredictAPI(Resource): +class ScorerAPI(Resource): def post(self): request_body = request.get_json() @@ -71,7 +71,7 @@ def get(self): pass -api.add_resource(PredictAPI, '/invocations') +api.add_resource(ScorerAPI, '/invocations') api.add_resource(PingAPI, '/ping') From 0c541b6d4de1780eb7c4627dd56851ad01a53d6a Mon Sep 17 00:00:00 2001 From: Susankha Date: Tue, 11 Jan 2022 12:01:44 +0530 Subject: [PATCH 04/24] add request body validation --- aws-sagemaker-hosted-scorer-cpp/Dockerfile | 4 +-- ...{mojo-cpp-scorer.py => mojo_cpp_scorer.py} | 33 +++++++++++++++---- 2 files changed, 29 insertions(+), 8 deletions(-) rename aws-sagemaker-hosted-scorer-cpp/py/scorer/{mojo-cpp-scorer.py => mojo_cpp_scorer.py} (72%) diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile index 9d839ae8..af5b1e43 100644 --- a/aws-sagemaker-hosted-scorer-cpp/Dockerfile +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -14,8 +14,8 @@ ENV MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' RUN mkdir -p /opt/ml/code -COPY py/scorer/mojo-cpp-scorer.py /opt/ml/code +COPY py/scorer/mojo_cpp_scorer.py /opt/ml/code WORKDIR /opt/ml/code -ENTRYPOINT ["python", "mojo-cpp-scorer.py"] +ENTRYPOINT ["python", "mojo_cpp_scorer.py"] diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py similarity index 72% rename from aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py rename to aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index 9ad56f86..a756665a 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo-cpp-scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -55,13 +55,20 @@ def get_prediction(self, d_frame): return self._model.predict(d_frame) +class ScorerError(Exception): + """Base Scorer Error""" + status_code = 500 + +class BadRequest(ScorerError): + """Bad request""" + status_code = 400 + + class ScorerAPI(Resource): def post(self): - request_body = request.get_json() - res = predict(request_body) - - json_response = json.dumps(res) + response = request_handler(request) + json_response = json.dumps(response) return json_response @@ -75,7 +82,21 @@ def get(self): api.add_resource(PingAPI, '/ping') -def predict(request_body): +def request_handler(request): + request_body = request.get_json() + if request_body is None or len(request_body.keys()) == 0: + raise BadRequest("Invalid request. Need a request body.") + + if 'fields' not in request_body.keys() or not isinstance(request_body['fields'], list): + raise BadRequest("Cannot determine the request column fields") + + if 'rows' not in request_body.keys() or not isinstance(request_body['rows'], list): + raise BadRequest("Cannot determine the request rows") + + scoring_result = score(request_body) + return scoring_result + +def score(request_body): mojo = MojoPipeline() # properly define types based on the request elements order @@ -94,7 +115,7 @@ def predict(request_body): result = mojo.get_prediction(d_frame) return { - 'Scores': result.to_list() + 'score': result.to_list() } From 30daaa3f4573b106edd57a17ad2306a4f7873c2f Mon Sep 17 00:00:00 2001 From: Susankha Date: Wed, 12 Jan 2022 16:14:00 +0530 Subject: [PATCH 05/24] modify docker file to install python libs on same layer --- aws-sagemaker-hosted-scorer-cpp/Dockerfile | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile index af5b1e43..89619191 100644 --- a/aws-sagemaker-hosted-scorer-cpp/Dockerfile +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -1,16 +1,11 @@ FROM python:3.8 -RUN apt-get update +RUN apt-get -y update && pip3 install datatable \ + flask-restful \ + https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl -RUN pip3 install https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl - -RUN pip3 install datatable - -RUN pip3 install flask-restful - -ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' - -ENV MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' +ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' \ + MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' RUN mkdir -p /opt/ml/code From 9173118d2f5ed62d0a54e9d7e7306f3f966dc198 Mon Sep 17 00:00:00 2001 From: Susankha Date: Thu, 13 Jan 2022 10:06:17 +0530 Subject: [PATCH 06/24] add seperate file to install python libs --- aws-sagemaker-hosted-scorer-cpp/Dockerfile | 8 +++++--- aws-sagemaker-hosted-scorer-cpp/requirements.txt | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 aws-sagemaker-hosted-scorer-cpp/requirements.txt diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile index 89619191..ef591603 100644 --- a/aws-sagemaker-hosted-scorer-cpp/Dockerfile +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -1,8 +1,10 @@ FROM python:3.8 -RUN apt-get -y update && pip3 install datatable \ - flask-restful \ - https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl +RUN apt-get update && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /tmp/requirements.txt + +RUN pip install -r /tmp/requirements.txt ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' \ MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt new file mode 100644 index 00000000..8201e184 --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -0,0 +1,4 @@ +datatable +flask-restful +https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl + From d3442075911268ed159ba21110b065fb6cd49a7b Mon Sep 17 00:00:00 2001 From: Susankha Date: Wed, 19 Jan 2022 08:03:50 +0530 Subject: [PATCH 07/24] combind request payload with scoring results using cbind and then conver to panda_df --> json --- .../py/scorer/mojo_cpp_scorer.py | 16 ++++++++-------- aws-sagemaker-hosted-scorer-cpp/requirements.txt | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index a756665a..696f2ed2 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -6,6 +6,7 @@ import threading import daimojo.model import datatable as dt +import pandas as pd logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -67,9 +68,8 @@ class BadRequest(ScorerError): class ScorerAPI(Resource): def post(self): - response = request_handler(request) - json_response = json.dumps(response) - return json_response + response_payload = request_handler(request) + return response_payload class PingAPI(Resource): @@ -112,12 +112,12 @@ def score(request_body): na_strings=mojo.get_missing_values() ) - result = mojo.get_prediction(d_frame) - - return { - 'score': result.to_list() - } + result_frame = mojo.get_prediction(d_frame) + combined_frame = dt.cbind(d_frame, result_frame) + pandas_df = combined_frame.to_pandas() + json_response = pandas_df.to_json(orient = 'records', date_format='iso') + return json_response if __name__ == '__main__': logger.info('==== Starting the H2O mojo-cpp scoring server =====') diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt index 8201e184..599551c8 100644 --- a/aws-sagemaker-hosted-scorer-cpp/requirements.txt +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -1,3 +1,4 @@ +pandas datatable flask-restful https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl From fca4b9d7e31acc2cec52fd1f8877522058e1e246 Mon Sep 17 00:00:00 2001 From: Susankha Date: Thu, 20 Jan 2022 10:21:08 +0530 Subject: [PATCH 08/24] combined scored results with includeFieldsInOutput comes with request --- .../py/scorer/mojo_cpp_scorer.py | 40 ++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index 696f2ed2..e8d0b085 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -32,6 +32,9 @@ def setup(self): mojo_file_path = os.getenv('MOJO_FILE_PATH') self._model = daimojo.model(mojo_file_path) + def get_id(self): + return self._model.uuid + def get_feature_names(self): """Return feature names""" @@ -69,7 +72,8 @@ class ScorerAPI(Resource): def post(self): response_payload = request_handler(request) - return response_payload + json_response = json.dumps(response_payload) + return json_response class PingAPI(Resource): @@ -113,11 +117,37 @@ def score(request_body): ) result_frame = mojo.get_prediction(d_frame) - combined_frame = dt.cbind(d_frame, result_frame) - pandas_df = combined_frame.to_pandas() - json_response = pandas_df.to_json(orient = 'records', date_format='iso') + request_id = mojo.get_id() + + + #check whether 'includeFieldsInOutput' comes with request body and combined them with scored result + if 'includeFieldsInOutput' in request_body: + include_fields = list(request_body['includeFieldsInOutput']) + if len(include_fields) > 0: + combined_df = get_combined_frame(d_frame, result_frame, include_fields) + pandas_df = combined_df.to_pandas() + json_response = pandas_df.to_json(orient = 'values', date_format='iso') + + return { + 'id': request_id, + 'fields' : list(combined_df.names), + 'score': json_response + } + + return { + 'id': request_id, + 'score': result_frame.to_list() + } + +def get_combined_frame(input_frame, result_frame, include_on_out_fields): + combined_frame = dt.Frame() + + for include_column in include_on_out_fields: + combined_frame = dt.cbind(combined_frame, input_frame[include_column]) + + combined_frame = dt.cbind(combined_frame, result_frame) + return combined_frame - return json_response if __name__ == '__main__': logger.info('==== Starting the H2O mojo-cpp scoring server =====') From 9f34a7d744eb1471f7f07889fda797cccf05e01e Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Thu, 14 Apr 2022 09:58:31 -0400 Subject: [PATCH 09/24] Update dockerfile to include libopenblas-dev and update mojo runtime --- aws-sagemaker-hosted-scorer-cpp/Dockerfile | 4 +++- aws-sagemaker-hosted-scorer-cpp/requirements.txt | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile index ef591603..b770a536 100644 --- a/aws-sagemaker-hosted-scorer-cpp/Dockerfile +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -1,6 +1,8 @@ FROM python:3.8 -RUN apt-get update && rm -rf /var/lib/apt/lists/* +RUN apt-get update && \ + apt-get install -y libopenblas-dev && \ + rm -rf /var/lib/apt/lists/* COPY requirements.txt /tmp/requirements.txt diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt index 599551c8..43d5ead7 100644 --- a/aws-sagemaker-hosted-scorer-cpp/requirements.txt +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -1,5 +1,4 @@ pandas datatable flask-restful -https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.5/x86_64-centos7/daimojo-2.7.5-cp38-cp38-linux_x86_64.whl - +https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.8%2Bmaster.431/x86_64-centos7/daimojo-2.7.8%2Bmaster.431-cp38-cp38-linux_x86_64.whl From d567f99bf1adaee0ee34b7f4a7f68e336c189fdd Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Thu, 14 Apr 2022 14:23:45 -0400 Subject: [PATCH 10/24] Add error handler and fix DT N/A translation, it should be covered in the predict --- .../py/scorer/mojo_cpp_scorer.py | 84 ++++++++----------- 1 file changed, 33 insertions(+), 51 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index e8d0b085..7872ac28 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -37,12 +37,10 @@ def get_id(self): def get_feature_names(self): """Return feature names""" - return self._model.feature_names def get_types(self): """Get column types""" - types = {} for index, value in enumerate(self._model.feature_names): types[value] = self._model.feature_types[index] @@ -50,12 +48,10 @@ def get_types(self): def get_missing_values(self): """Return mojo missing values""" - return self._model.missing_values def get_prediction(self, d_frame): """Score and return predictions on a given dataset""" - return self._model.predict(d_frame) @@ -63,82 +59,47 @@ class ScorerError(Exception): """Base Scorer Error""" status_code = 500 + class BadRequest(ScorerError): """Bad request""" status_code = 400 -class ScorerAPI(Resource): - - def post(self): - response_payload = request_handler(request) - json_response = json.dumps(response_payload) - return json_response - - -class PingAPI(Resource): - - def get(self): - pass - - -api.add_resource(ScorerAPI, '/invocations') -api.add_resource(PingAPI, '/ping') - - -def request_handler(request): - request_body = request.get_json() +def score(request_body): if request_body is None or len(request_body.keys()) == 0: - raise BadRequest("Invalid request. Need a request body.") + raise BadRequest("Invalid request. Need a request body.") if 'fields' not in request_body.keys() or not isinstance(request_body['fields'], list): - raise BadRequest("Cannot determine the request column fields") + raise BadRequest("Cannot determine the request column fields") if 'rows' not in request_body.keys() or not isinstance(request_body['rows'], list): - raise BadRequest("Cannot determine the request rows") - - scoring_result = score(request_body) - return scoring_result + raise BadRequest("Cannot determine the request rows") -def score(request_body): mojo = MojoPipeline() # properly define types based on the request elements order - tmp_frame = dt.Frame( + d_frame = dt.Frame( [list(x) for x in list(zip(*request_body['rows']))], names=list(mojo.get_types().keys()), stypes=list(mojo.get_types().values()) ) - d_frame = dt.fread( - text=tmp_frame.to_csv(), - columns=list(mojo.get_types().keys()), - na_strings=mojo.get_missing_values() - ) - result_frame = mojo.get_prediction(d_frame) request_id = mojo.get_id() - - #check whether 'includeFieldsInOutput' comes with request body and combined them with scored result + # check whether 'includeFieldsInOutput' comes with request body and combined them with scored result if 'includeFieldsInOutput' in request_body: - include_fields = list(request_body['includeFieldsInOutput']) - if len(include_fields) > 0: - combined_df = get_combined_frame(d_frame, result_frame, include_fields) - pandas_df = combined_df.to_pandas() - json_response = pandas_df.to_json(orient = 'values', date_format='iso') - - return { - 'id': request_id, - 'fields' : list(combined_df.names), - 'score': json_response - } + include_fields = list(request_body['includeFieldsInOutput']) + if len(include_fields) > 0: + result_frame = get_combined_frame(d_frame, result_frame, include_fields) return { 'id': request_id, + 'fields': result_frame.names, 'score': result_frame.to_list() } + def get_combined_frame(input_frame, result_frame, include_on_out_fields): combined_frame = dt.Frame() @@ -149,6 +110,27 @@ def get_combined_frame(input_frame, result_frame, include_on_out_fields): return combined_frame +class ScorerAPI(Resource): + def post(self): + request_body = request.get_json() + try: + scoring_result = score(request_body) + except ScorerError as e: + return {'message': str(e)}, e.status_code + except Exception as exc: + return {'message': str(exc)}, 500 + return scoring_result, 200 + + +class PingAPI(Resource): + + def get(self): + pass + + +api.add_resource(ScorerAPI, '/invocations') +api.add_resource(PingAPI, '/ping') + if __name__ == '__main__': logger.info('==== Starting the H2O mojo-cpp scoring server =====') app.run(host='0.0.0.0', port=8080) From 09a9dd0862978632aa9c16ac68aea79cc764a0f6 Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Tue, 19 Apr 2022 15:33:52 -0400 Subject: [PATCH 11/24] Set OMP_NUM_THREADS to 1 --- .../py/scorer/mojo_cpp_scorer.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index 7872ac28..c468cd30 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -1,12 +1,11 @@ -from flask import Flask, request -from flask_restful import Resource, Api import logging -import json import os import threading + import daimojo.model import datatable as dt -import pandas as pd +from flask import Flask, request +from flask_restful import Resource, Api logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -29,6 +28,7 @@ def __new__(cls): return MojoPipeline._instance def setup(self): + self._set_omp_threads() mojo_file_path = os.getenv('MOJO_FILE_PATH') self._model = daimojo.model(mojo_file_path) @@ -52,8 +52,13 @@ def get_missing_values(self): def get_prediction(self, d_frame): """Score and return predictions on a given dataset""" + self._set_omp_threads() return self._model.predict(d_frame) + @staticmethod + def _set_omp_threads(): + os.environ['OMP_NUM_THREADS'] = str(1) + class ScorerError(Exception): """Base Scorer Error""" From 55df6ac91b7db2de1973e65389a6bc6e88524736 Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Wed, 20 Apr 2022 11:37:14 -0400 Subject: [PATCH 12/24] Add timeit arg to return time spent on mojo predict --- .../py/scorer/mojo_cpp_scorer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index c468cd30..e021a9de 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -1,6 +1,7 @@ import logging import os import threading +import time import daimojo.model import datatable as dt @@ -70,7 +71,7 @@ class BadRequest(ScorerError): status_code = 400 -def score(request_body): +def score(request_body, timeit=False): if request_body is None or len(request_body.keys()) == 0: raise BadRequest("Invalid request. Need a request body.") @@ -89,7 +90,9 @@ def score(request_body): stypes=list(mojo.get_types().values()) ) + start = time.time() if timeit else 0 result_frame = mojo.get_prediction(d_frame) + delta = time.time() - start if timeit else 0 request_id = mojo.get_id() # check whether 'includeFieldsInOutput' comes with request body and combined them with scored result @@ -98,11 +101,15 @@ def score(request_body): if len(include_fields) > 0: result_frame = get_combined_frame(d_frame, result_frame, include_fields) - return { + ret = { 'id': request_id, 'fields': result_frame.names, 'score': result_frame.to_list() } + if timeit: + ret['time'] = delta + + return ret def get_combined_frame(input_frame, result_frame, include_on_out_fields): @@ -119,7 +126,7 @@ class ScorerAPI(Resource): def post(self): request_body = request.get_json() try: - scoring_result = score(request_body) + scoring_result = score(request_body, timeit='timeit' in request.args.keys()) except ScorerError as e: return {'message': str(e)}, e.status_code except Exception as exc: From 5da22a6080525f7e5e6b9986493da6164df4183e Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Wed, 20 Apr 2022 11:46:13 -0400 Subject: [PATCH 13/24] Set OMP threads to nproc/2 --- aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index e021a9de..d126f066 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -58,7 +58,9 @@ def get_prediction(self, d_frame): @staticmethod def _set_omp_threads(): - os.environ['OMP_NUM_THREADS'] = str(1) + os.environ['OMP_NUM_THREADS'] = str(max(1, int(os.cpu_count()/2))) + os.environ['OMP_SCHEDULE'] = 'STATIC' + #os.environ['OMP_PROC_BIND'] = 'CLOSE' class ScorerError(Exception): From fef0c29b785d2cb430e78e1ceb5ab1bafde79894 Mon Sep 17 00:00:00 2001 From: achraf-mer <51244975+achraf-mer@users.noreply.github.com> Date: Thu, 12 May 2022 15:10:50 -0400 Subject: [PATCH 14/24] Update mojo runtime --- aws-sagemaker-hosted-scorer-cpp/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt index 43d5ead7..1816b06b 100644 --- a/aws-sagemaker-hosted-scorer-cpp/requirements.txt +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -1,4 +1,4 @@ pandas datatable flask-restful -https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.8%2Bmaster.431/x86_64-centos7/daimojo-2.7.8%2Bmaster.431-cp38-cp38-linux_x86_64.whl +https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.9%2Bcu111.master.447/x86_64-centos7/daimojo-2.7.9%2Bmaster.447-cp38-cp38-linux_x86_64.whl From 20571f6e534b5a8d70b1eb448e6bf4027f824ccd Mon Sep 17 00:00:00 2001 From: achraf-mer <51244975+achraf-mer@users.noreply.github.com> Date: Fri, 20 May 2022 14:33:40 -0400 Subject: [PATCH 15/24] Update requirements.txt --- aws-sagemaker-hosted-scorer-cpp/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt index 1816b06b..f877fab9 100644 --- a/aws-sagemaker-hosted-scorer-cpp/requirements.txt +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -1,4 +1,4 @@ pandas datatable flask-restful -https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.9%2Bcu111.master.447/x86_64-centos7/daimojo-2.7.9%2Bmaster.447-cp38-cp38-linux_x86_64.whl +https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.8/x86_64-centos7/daimojo-2.7.8-cp38-cp38-linux_x86_64.whl From 7713046918b5a05357b6688855c8a6f812e00af5 Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Wed, 25 May 2022 12:46:17 -0400 Subject: [PATCH 16/24] Update mojo runtime and do not use flask internal ws --- aws-sagemaker-hosted-scorer-cpp/Dockerfile | 4 ++-- aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py | 4 ++-- aws-sagemaker-hosted-scorer-cpp/requirements.txt | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile index b770a536..e750a8d4 100644 --- a/aws-sagemaker-hosted-scorer-cpp/Dockerfile +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && \ COPY requirements.txt /tmp/requirements.txt -RUN pip install -r /tmp/requirements.txt +RUN pip install pip==21.1 && pip install -r /tmp/requirements.txt ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' \ MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' @@ -17,4 +17,4 @@ COPY py/scorer/mojo_cpp_scorer.py /opt/ml/code WORKDIR /opt/ml/code -ENTRYPOINT ["python", "mojo_cpp_scorer.py"] +ENTRYPOINT ["gunicorn", "-w", "1", "-b", "0.0.0.0:8080", "mojo_cpp_scorer:app"] diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index d126f066..38ec3c32 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -60,7 +60,7 @@ def get_prediction(self, d_frame): def _set_omp_threads(): os.environ['OMP_NUM_THREADS'] = str(max(1, int(os.cpu_count()/2))) os.environ['OMP_SCHEDULE'] = 'STATIC' - #os.environ['OMP_PROC_BIND'] = 'CLOSE' + os.environ['OMP_PROC_BIND'] = 'CLOSE' class ScorerError(Exception): @@ -147,4 +147,4 @@ def get(self): if __name__ == '__main__': logger.info('==== Starting the H2O mojo-cpp scoring server =====') - app.run(host='0.0.0.0', port=8080) + app.run(host='0.0.0.0', port=8080, threaded=True) diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt index f877fab9..325629fe 100644 --- a/aws-sagemaker-hosted-scorer-cpp/requirements.txt +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -1,4 +1,5 @@ pandas datatable flask-restful -https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.8/x86_64-centos7/daimojo-2.7.8-cp38-cp38-linux_x86_64.whl +gunicorn +https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.9%2Bcu111.master.446/x86_64-centos7/daimojo-2.7.9%2Bmaster.446-cp38-cp38-linux_x86_64.whl From 718abfaa61faf475171494436b50a6148ab5b0bd Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Wed, 25 May 2022 13:21:51 -0400 Subject: [PATCH 17/24] Set Flask threaded to false (to be used when testing) --- aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index 38ec3c32..3ae7010f 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -147,4 +147,4 @@ def get(self): if __name__ == '__main__': logger.info('==== Starting the H2O mojo-cpp scoring server =====') - app.run(host='0.0.0.0', port=8080, threaded=True) + app.run(host='0.0.0.0', port=8080, threaded=False) From 62f004239286a3e1caeb053ec687c254adf70ea6 Mon Sep 17 00:00:00 2001 From: Achraf Merzouki Date: Thu, 26 May 2022 11:07:02 -0400 Subject: [PATCH 18/24] Control web server workers via WEB_SERVER_WORKERS env variable --- aws-sagemaker-hosted-scorer-cpp/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/Dockerfile b/aws-sagemaker-hosted-scorer-cpp/Dockerfile index e750a8d4..0c7d8e80 100644 --- a/aws-sagemaker-hosted-scorer-cpp/Dockerfile +++ b/aws-sagemaker-hosted-scorer-cpp/Dockerfile @@ -8,8 +8,9 @@ COPY requirements.txt /tmp/requirements.txt RUN pip install pip==21.1 && pip install -r /tmp/requirements.txt -ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' \ - MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' +ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' +ENV MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' +ENV WEB_SERVER_WORKERS=1 RUN mkdir -p /opt/ml/code @@ -17,4 +18,4 @@ COPY py/scorer/mojo_cpp_scorer.py /opt/ml/code WORKDIR /opt/ml/code -ENTRYPOINT ["gunicorn", "-w", "1", "-b", "0.0.0.0:8080", "mojo_cpp_scorer:app"] +ENTRYPOINT gunicorn -w ${WEB_SERVER_WORKERS:-1} -b 0.0.0.0:8080 mojo_cpp_scorer:app From abf22ac8385f993f2d4844cc377bffb21116148a Mon Sep 17 00:00:00 2001 From: achraf-mer <51244975+achraf-mer@users.noreply.github.com> Date: Tue, 31 May 2022 10:51:34 -0400 Subject: [PATCH 19/24] Update README.md --- aws-sagemaker-hosted-scorer-cpp/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aws-sagemaker-hosted-scorer-cpp/README.md b/aws-sagemaker-hosted-scorer-cpp/README.md index 3012d9b1..320d0aa0 100644 --- a/aws-sagemaker-hosted-scorer-cpp/README.md +++ b/aws-sagemaker-hosted-scorer-cpp/README.md @@ -34,6 +34,8 @@ docker run \ -p 8080:8080 \ harbor.h2o.ai/opsh2oai/h2oai/sagemaker-hosted-scorer: ``` +(the number of web server workers can be configured by setting the environment variable: `WEB_SERVER_WORKERS` in the docker run command) + Step 3: Use the following curl command to test the container locally: ``` From e6f02771e195fda2c738c0256cfb7139bc49b87b Mon Sep 17 00:00:00 2001 From: Vladimir Ovsyannikov Date: Mon, 30 May 2022 13:16:56 +0200 Subject: [PATCH 20/24] gpu image --- aws-sagemaker-hosted-scorer-cpp/gpu.docker | 21 ++++++++++++++++++ .../requirements.txt | 22 +++++++++++++++---- .../requirements_gpu.txt | 19 ++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 aws-sagemaker-hosted-scorer-cpp/gpu.docker create mode 100644 aws-sagemaker-hosted-scorer-cpp/requirements_gpu.txt diff --git a/aws-sagemaker-hosted-scorer-cpp/gpu.docker b/aws-sagemaker-hosted-scorer-cpp/gpu.docker new file mode 100644 index 00000000..265d20be --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/gpu.docker @@ -0,0 +1,21 @@ +FROM nvidia/cuda:11.1.1-base-ubuntu20.04 + +RUN apt-get update && \ + apt-get install -y libopenblas-dev python3.8 python3-pip && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.8 0 && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements_gpu.txt /tmp/requirements.txt + +RUN python -m pip install pip==21.1 && pip install -r /tmp/requirements.txt + +ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' \ + MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' + +RUN mkdir -p /opt/ml/code + +COPY py/scorer/mojo_cpp_scorer.py /opt/ml/code + +WORKDIR /opt/ml/code + +ENTRYPOINT ["gunicorn", "-w", "4", "-b", "0.0.0.0:8080", "mojo_cpp_scorer:app"] diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements.txt b/aws-sagemaker-hosted-scorer-cpp/requirements.txt index 325629fe..383e3763 100644 --- a/aws-sagemaker-hosted-scorer-cpp/requirements.txt +++ b/aws-sagemaker-hosted-scorer-cpp/requirements.txt @@ -1,5 +1,19 @@ -pandas -datatable -flask-restful -gunicorn +aniso8601==9.0.1 +click==8.1.3 +datatable==1.0.0 +Flask==2.1.2 +Flask-RESTful==0.3.9 +gunicorn==20.1.0 +importlib-metadata==4.11.4 +itsdangerous==2.1.2 +Jinja2==3.1.2 +MarkupSafe==2.1.1 +numpy==1.22.4 +pandas==1.4.2 +protobuf==4.21.1 +python-dateutil==2.8.2 +pytz==2022.1 +six==1.16.0 +Werkzeug==2.1.2 +zipp==3.8.0 https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.9%2Bcu111.master.446/x86_64-centos7/daimojo-2.7.9%2Bmaster.446-cp38-cp38-linux_x86_64.whl diff --git a/aws-sagemaker-hosted-scorer-cpp/requirements_gpu.txt b/aws-sagemaker-hosted-scorer-cpp/requirements_gpu.txt new file mode 100644 index 00000000..3731439f --- /dev/null +++ b/aws-sagemaker-hosted-scorer-cpp/requirements_gpu.txt @@ -0,0 +1,19 @@ +aniso8601==9.0.1 +click==8.1.3 +datatable==1.0.0 +Flask==2.1.2 +Flask-RESTful==0.3.9 +gunicorn==20.1.0 +importlib-metadata==4.11.4 +itsdangerous==2.1.2 +Jinja2==3.1.2 +MarkupSafe==2.1.1 +numpy==1.22.4 +pandas==1.4.2 +protobuf==4.21.1 +python-dateutil==2.8.2 +pytz==2022.1 +six==1.16.0 +Werkzeug==2.1.2 +zipp==3.8.0 +https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/daimojo/2.7.9%2Bmaster.450/x86_64-centos7/daimojo-2.7.9%2Bcu111.master.450-cp38-cp38-linux_x86_64.whl \ No newline at end of file From 2cdc6c16417b2f835558ec6c84b32f304ad0ba89 Mon Sep 17 00:00:00 2001 From: Vladimir Ovsyannikov Date: Mon, 30 May 2022 13:43:54 +0200 Subject: [PATCH 21/24] update docs --- aws-sagemaker-hosted-scorer-cpp/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/aws-sagemaker-hosted-scorer-cpp/README.md b/aws-sagemaker-hosted-scorer-cpp/README.md index 320d0aa0..b0f0ff58 100644 --- a/aws-sagemaker-hosted-scorer-cpp/README.md +++ b/aws-sagemaker-hosted-scorer-cpp/README.md @@ -10,6 +10,13 @@ Run the following command to build the docker image. docker build -t .dkr.ecr..amazonaws.com/h2oai/sagemaker-hosted-scorer: . ``` +Run the following command to build the docker image with gpu support. + +```bash +docker build -t .dkr.ecr..amazonaws.com/h2oai/sagemaker-hosted-scorer: -f gpu.docker . +``` + + Verify that the Docker image was created, and take note of the version created. ```bash @@ -34,6 +41,20 @@ docker run \ -p 8080:8080 \ harbor.h2o.ai/opsh2oai/h2oai/sagemaker-hosted-scorer: ``` + +And to run the gpu image. + +``` +docker run \ + --gpus all \ + --rm \ + --init \ + -ti \ + -v `pwd`:/opt/ml/model \ + -p 8080:8080 \ + harbor.h2o.ai/opsh2oai/h2oai/sagemaker-hosted-scorer: +``` + (the number of web server workers can be configured by setting the environment variable: `WEB_SERVER_WORKERS` in the docker run command) Step 3: Use the following curl command to test the container locally: From 719dfa81024381d96dda9922dc77e00ea05cb61c Mon Sep 17 00:00:00 2001 From: achraf-mer <51244975+achraf-mer@users.noreply.github.com> Date: Tue, 31 May 2022 10:53:19 -0400 Subject: [PATCH 22/24] Update gpu.docker --- aws-sagemaker-hosted-scorer-cpp/gpu.docker | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/gpu.docker b/aws-sagemaker-hosted-scorer-cpp/gpu.docker index 265d20be..3550f00b 100644 --- a/aws-sagemaker-hosted-scorer-cpp/gpu.docker +++ b/aws-sagemaker-hosted-scorer-cpp/gpu.docker @@ -9,8 +9,9 @@ COPY requirements_gpu.txt /tmp/requirements.txt RUN python -m pip install pip==21.1 && pip install -r /tmp/requirements.txt -ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' \ - MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' +ENV DRIVERLESS_AI_LICENSE_FILE='/opt/ml/model/license.sig' +ENV MOJO_FILE_PATH='/opt/ml/model/pipeline.mojo' +ENV WEB_SERVER_WORKERS=4 RUN mkdir -p /opt/ml/code @@ -18,4 +19,4 @@ COPY py/scorer/mojo_cpp_scorer.py /opt/ml/code WORKDIR /opt/ml/code -ENTRYPOINT ["gunicorn", "-w", "4", "-b", "0.0.0.0:8080", "mojo_cpp_scorer:app"] +ENTRYPOINT gunicorn -w ${WEB_SERVER_WORKERS:-4} -b 0.0.0.0:8080 mojo_cpp_scorer:app From 323c8acb5ec7afb1b6d5e1b20ee40d8116c29142 Mon Sep 17 00:00:00 2001 From: Vladimir Ovsyannikov Date: Thu, 2 Jun 2022 14:54:41 +0200 Subject: [PATCH 23/24] use ENV WEB_SERVER_WORKERS --- aws-sagemaker-hosted-scorer-cpp/gpu.docker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/gpu.docker b/aws-sagemaker-hosted-scorer-cpp/gpu.docker index 3550f00b..b6d845c0 100644 --- a/aws-sagemaker-hosted-scorer-cpp/gpu.docker +++ b/aws-sagemaker-hosted-scorer-cpp/gpu.docker @@ -19,4 +19,4 @@ COPY py/scorer/mojo_cpp_scorer.py /opt/ml/code WORKDIR /opt/ml/code -ENTRYPOINT gunicorn -w ${WEB_SERVER_WORKERS:-4} -b 0.0.0.0:8080 mojo_cpp_scorer:app +ENTRYPOINT gunicorn -w ${WEB_SERVER_WORKERS} -b 0.0.0.0:8080 mojo_cpp_scorer:app From 7e45ef255c37524b4b3f9672e924b8f2abec2668 Mon Sep 17 00:00:00 2001 From: Vladimir Ovsyannikov Date: Mon, 13 Jun 2022 15:09:07 +0200 Subject: [PATCH 24/24] configuration for OMP_NUM_THREADS and WEB_SERVER_WORKERS --- aws-sagemaker-hosted-scorer-cpp/README.md | 5 +++++ aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/aws-sagemaker-hosted-scorer-cpp/README.md b/aws-sagemaker-hosted-scorer-cpp/README.md index b0f0ff58..f6f9d915 100644 --- a/aws-sagemaker-hosted-scorer-cpp/README.md +++ b/aws-sagemaker-hosted-scorer-cpp/README.md @@ -57,6 +57,11 @@ docker run \ (the number of web server workers can be configured by setting the environment variable: `WEB_SERVER_WORKERS` in the docker run command) +Recommended parameters: +* `OMP_NUM_THREADS=8` and can be increased for long size texts. +* `cores/(2*OMP_NUM_THREADS) <= WEB_SERVER_WORKERS <= cores/OMP_NUM_THREADS` +* `OMP_NUM_THREADS*WEB_SERVER_WORKERS` MUST NOT exceed `cores` + Step 3: Use the following curl command to test the container locally: ``` diff --git a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py index 3ae7010f..39d1ef62 100644 --- a/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py +++ b/aws-sagemaker-hosted-scorer-cpp/py/scorer/mojo_cpp_scorer.py @@ -58,9 +58,7 @@ def get_prediction(self, d_frame): @staticmethod def _set_omp_threads(): - os.environ['OMP_NUM_THREADS'] = str(max(1, int(os.cpu_count()/2))) - os.environ['OMP_SCHEDULE'] = 'STATIC' - os.environ['OMP_PROC_BIND'] = 'CLOSE' + os.environ['OMP_NUM_THREADS'] = str(min(8, int(os.cpu_count())/2)) class ScorerError(Exception):