From 4bd6f44b77071ff8bb722ab78ea3a220583c9e67 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:23:35 +0200 Subject: [PATCH 01/26] Small Python version changes --- config/example.json | 2 +- config/systems.json | 3 ++- docs/platforms.md | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/config/example.json b/config/example.json index dc4da9ad..f405a3be 100644 --- a/config/example.json +++ b/config/example.json @@ -6,7 +6,7 @@ "download_results": false, "runtime": { "language": "python", - "version": "3.7" + "version": "3.9" }, "type": "invocation-overhead", "perf-cost": { diff --git a/config/systems.json b/config/systems.json index 8272078f..7009cbdb 100644 --- a/config/systems.json +++ b/config/systems.json @@ -18,7 +18,8 @@ "python": { "base_images": { "3.7": "python:3.7-slim", - "3.8": "python:3.8-slim" + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim" }, "images": [ "run", diff --git a/docs/platforms.md b/docs/platforms.md index ea3cd916..75364415 100644 --- a/docs/platforms.md +++ b/docs/platforms.md @@ -85,9 +85,9 @@ AZURE_SECRET_PASSWORD = XXXXXXXXXXXXX You can pass the credentials either using the environment variables: ``` -export AZURE_SECRET_APPLICATION_ID = XXXXXXXXXXXXXXXX -export AZURE_SECRET_TENANT = XXXXXXXXXXXX -export AZURE_SECRET_PASSWORD = XXXXXXXXXXXXX +export AZURE_SECRET_APPLICATION_ID=XXXXXXXXXXXXXXXX +export AZURE_SECRET_TENANT=XXXXXXXXXXXX +export AZURE_SECRET_PASSWORD=XXXXXXXXXXXXX ``` or in the JSON input configuration: From 728288e4cd00f9aa902a53793af0b6de17adb720 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:26:01 +0200 Subject: [PATCH 02/26] AWS queue and storage triggers --- benchmarks/wrappers/aws/python/handler.py | 17 ++- sebs/aws/aws.py | 18 ++- sebs/aws/function.py | 4 +- sebs/aws/triggers.py | 173 +++++++++++++++++++++- sebs/faas/function.py | 1 + 5 files changed, 206 insertions(+), 7 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 907b2c61..0bcfeab0 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,18 +1,31 @@ import datetime, io, json, os, sys, uuid +import boto3 # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) -# TODO: usual trigger -# implement support for S3 and others def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() + # Queue trigger + if ("Records" in event and event["Records"][0]["eventSource"] == 'aws:sqs'): + event = json.loads(event["Records"][0]["body"]) + + # Storage trigger + if ("Records" in event and "s3" in event["Records"][0]): + s3_client = boto3.client('s3') + bucket_name = event["Records"][0]["s3"]["bucket"]["name"] + file_name = event["Records"][0]["s3"]["object"]["key"] + + obj = s3_client.get_object(Bucket=bucket_name, Key=file_name) + event = json.loads(obj['Body'].read()) + # HTTP trigger with API Gateaway if 'body' in event: event = json.loads(event['body']) + req_id = context.aws_request_id event['request-id'] = req_id event['income-timestamp'] = income_timestamp diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 04f2b964..32485457 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -252,13 +252,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun def cached_function(self, function: Function): - from sebs.aws.triggers import LibraryTrigger + from sebs.aws.triggers import LibraryTrigger, QueueTrigger, StorageTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).deployment_client = self for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers + for trigger in function.triggers(Trigger.TriggerType.QUEUE): + trigger.logging_handlers = self.logging_handlers + cast(QueueTrigger, trigger).deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.STORAGE): + trigger.logging_handlers = self.logging_handlers + cast(StorageTrigger, trigger).deployment_client = self """ Update function code and configuration on AWS. @@ -478,7 +484,7 @@ def download_metrics( ) def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.aws.triggers import HTTPTrigger + from sebs.aws.triggers import HTTPTrigger, QueueTrigger, StorageTrigger function = cast(LambdaFunction, func) @@ -505,6 +511,14 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.LIBRARY: # should already exist return func.triggers(Trigger.TriggerType.LIBRARY)[0] + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(func.name, self) + trigger.logging_handlers = self.logging_handlers + self.logging.info(f"Created Queue trigger for {func.name} function.") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(func.name, self) + trigger.logging_handlers = self.logging_handlers + self.logging.info(f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") diff --git a/sebs/aws/function.py b/sebs/aws/function.py index a36dc821..2787ce86 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -38,7 +38,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": from sebs.faas.function import Trigger - from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + from sebs.aws.triggers import LibraryTrigger, HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = LambdaFunction( @@ -54,7 +54,7 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": LibraryTrigger, "HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f1831459..34ffafae 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -2,7 +2,10 @@ import concurrent.futures import datetime import json -from typing import Dict, Optional # noqa +from typing import Dict, Optional +import uuid # noqa + +import boto3 from sebs.aws.aws import AWS from sebs.faas.function import ExecutionResult, Trigger @@ -123,3 +126,171 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return HTTPTrigger(obj["url"], obj["api-id"]) + + +class QueueTrigger(Trigger): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + super().__init__() + self.name = fname + self._deployment_client = deployment_client + + @staticmethod + def typename() -> str: + return "AWS.QueueTrigger" + + @property + def deployment_client(self) -> AWS: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: AWS): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke function {self.name}") + + # Init clients + lambda_client = self.deployment_client.get_lambda_client() + sqs_client = boto3.client('sqs', region_name=self.deployment_client.config.region) + + serialized_payload = json.dumps(payload) + + # Create queue + self.logging.debug(f"Creating queue {self.name}") + + queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] + queue_arn = sqs_client.get_queue_attributes( + QueueUrl=queue_url, + AttributeNames=["QueueArn"] + )["Attributes"]["QueueArn"] + + self.logging.debug(f"Created queue") + + # Add queue trigger + if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, + FunctionName=self.name) + ["EventSourceMappings"])): + lambda_client.create_event_source_mapping( + EventSourceArn=queue_arn, + FunctionName=self.name, + MaximumBatchingWindowInSeconds=1 + ) + + # Publish payload to queue + sqs_client.send_message(QueueUrl=queue_url, MessageBody=serialized_payload) + self.logging.info(f"Sent message to queue {self.name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Queue", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger(obj["name"]) + + +class StorageTrigger(Trigger): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + super().__init__() + self.name = fname + self._deployment_client = deployment_client + + @staticmethod + def typename() -> str: + return "AWS.StorageTrigger" + + @property + def deployment_client(self) -> AWS: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: AWS): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke function {self.name}") + + # Init clients + lambda_client = self.deployment_client.get_lambda_client() + s3 = boto3.resource('s3') + + # Prep + serialized_payload = json.dumps(payload) + bucket_name = self.name.replace('_', '-') # AWS disallows underscores in bucket names + function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"]["FunctionArn"] + + # Create bucket + self.logging.info(f"Creating bucket {bucket_name}") + + region = self.deployment_client.config.region + if (region == "us-east-1"): + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={ + "LocationConstraint": region + } + ) + + self.logging.info("Created bucket") + + lambda_client.add_permission( + FunctionName=self.name, + StatementId=str(uuid.uuid1()), + Action="lambda:InvokeFunction", + Principal="s3.amazonaws.com", + SourceArn=f"arn:aws:s3:::{bucket_name}", + ) + + # Add bucket trigger + bucket_notification = s3.BucketNotification(bucket_name) + bucket_notification.put( + NotificationConfiguration={'LambdaFunctionConfigurations': [ + { + 'LambdaFunctionArn': function_arn, + 'Events': [ + 's3:ObjectCreated:*' + ], + + }, + ]} + ) + + # Put object + s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) + self.logging.info(f"Uploaded payload to bucket {bucket_name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Storage", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger(obj["name"]) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index c2226cee..df732360 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -179,6 +179,7 @@ class TriggerType(Enum): HTTP = "http" LIBRARY = "library" STORAGE = "storage" + QUEUE = "queue" @staticmethod def get(name: str) -> "Trigger.TriggerType": From 9c3a01653a2afef2bc24cb8ff295d571e8f1cc53 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:27:32 +0200 Subject: [PATCH 03/26] GCP queue and storage triggers --- benchmarks/wrappers/gcp/python/handler.py | 38 +++++- sebs/gcp/function.py | 9 +- sebs/gcp/gcp.py | 149 ++++++++++++++++++---- sebs/gcp/triggers.py | 116 +++++++++++++++++ 4 files changed, 282 insertions(+), 30 deletions(-) diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index b9017b52..e5093061 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -1,9 +1,10 @@ -import datetime, io, json, os, uuid, sys +import base64, datetime, io, json, os, uuid, sys -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +from google.cloud import storage as gcp_storage +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) -def handler(req): +def handler_http(req): income_timestamp = datetime.datetime.now().timestamp() req_id = req.headers.get('Function-Execution-Id') @@ -62,3 +63,34 @@ def handler(req): 'cold_start_var': cold_start_var, 'container_id': container_id, }), 200, {'ContentType': 'application/json'} + +def handler_queue(data, context): + serialized_payload = data.get('data') + payload = json.loads(base64.b64decode(serialized_payload).decode("ascii")) + + from function import function + ret = function.handler(payload) + + # TODO(oana) + +def handler_storage(data, context): + bucket_name = data.get('bucket') + name = data.get('name') + filepath = '/tmp/bucket_contents' + client = gcp_storage.Client(); + + print("Download {}:{} to {}".format(bucket_name, name, filepath)) + print(data) + bucket_instance = client.bucket(bucket_name) + blob = bucket_instance.blob(name) + blob.download_to_filename(filepath) + + payload = {} + + with open(filepath, 'r') as fp: + payload = json.load(fp) + + from function import function + ret = function.handler(payload) + + # TODO(oana) diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index d9c55a03..8354abc8 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -29,7 +29,8 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, \ + QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = GCPFunction( @@ -42,7 +43,11 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger + }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 45146974..bef8950a 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -99,6 +99,86 @@ def get_storage( self.storage.replace_existing = replace_existing return self.storage + """ + Provide the fully qualified name of a trigger resource (queue or storage). + """ + def get_trigger_resource_name(self, func_name: str) -> str: + trigger = func_name.split("-")[-1] + + assert trigger == "queue" or trigger == "storage" + + if (trigger == "queue"): + return 'projects/{project_name}/topics/{topic}'.format( + project_name=self.config.project_name, + topic=func_name + ) + else: + return 'projects/{project_name}/buckets/{bucket}'.format( + project_name=self.config.project_name, + bucket=func_name + ) + + """ + Trigger resources (queue, bucket) must exist on GCP before the + corresponding function is first deployed. + + This function creates the required resources and returns a dict + containing trigger information required by create_req inside of + create_function. + + :param func_name: the name of the function to be deployed, + including its trigger + + :return: JSON/dict with the trigger configuration required by GCP + on function creation/update + """ + def create_trigger_resource(self, func_name: str) -> Dict: + trigger = func_name.split("-")[-1] + + if (trigger == "queue"): + pub_sub = build("pubsub", "v1", cache_discovery=False) + topic_name = self.get_trigger_resource_name(func_name) + + self.logging.info(f"Creating queue '{topic_name}'") + try: + pub_sub.projects().topics().create(name=topic_name).execute() + self.logging.info("Created queue") + except HttpError as http_error: + if (http_error.resp.status == 409): + self.logging.info("Queue already exists, reusing...") + + return { + "eventTrigger": { + "eventType": "providers/cloud.pubsub/eventTypes/topic.publish", + "resource": topic_name, + }, + "entryPoint": "handler_queue", + } + elif (trigger == "storage"): + storage = build("storage", "v1", cache_discovery=False) + bucket_name = self.get_trigger_resource_name(func_name) + + self.logging.info(f"Creating storage bucket '{bucket_name}'") + try: + storage.buckets().insert( + project=self.config.project_name, + body={ "name": func_name }, + ).execute() + self.logging.info("Created storage bucket") + except HttpError as http_error: + if (http_error.resp.status == 409): + self.logging.info("Storage bucket already exists, reusing...") + + return { + "eventTrigger": { + "eventType": "google.storage.object.finalize", + "resource": bucket_name, + }, + "entryPoint": "handler_storage", + } + # HTTP triggers do not require resource creation + return { "httpsTrigger": {}, "entryPoint": "handler_http" } + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -212,6 +292,10 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti try: get_req.execute() except HttpError: + # Before creating the function, ensure all trigger resources (queue, + # bucket) exist on GCP. + trigger_info = self.create_trigger_resource(func_name) + create_req = ( self.function_client.projects() .locations() @@ -222,14 +306,12 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti ), body={ "name": full_func_name, - "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": memory, "timeout": str(timeout) + "s", - "httpsTrigger": {}, "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_package_name, - }, + } | trigger_info, ) ) create_req.execute() @@ -278,28 +360,34 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti return function def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.gcp.triggers import HTTPTrigger + from sebs.gcp.triggers import HTTPTrigger, QueueTrigger, StorageTrigger - if trigger_type == Trigger.TriggerType.HTTP: + location = self.config.region + project_name = self.config.project_name + full_func_name = GCP.get_full_function_name(project_name, location, function.name) + self.logging.info(f"Function {function.name} - waiting for deployment...") + our_function_req = ( + self.function_client.projects().locations().functions().get(name=full_func_name) + ) + deployed = False + while not deployed: + status_res = our_function_req.execute() + if status_res["status"] == "ACTIVE": + deployed = True + else: + time.sleep(3) + self.logging.info(f"Function {function.name} - deployed!") - location = self.config.region - project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) - self.logging.info(f"Function {function.name} - waiting for deployment...") - our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) - ) - deployed = False - while not deployed: - status_res = our_function_req.execute() - if status_res["status"] == "ACTIVE": - deployed = True - else: - time.sleep(3) - self.logging.info(f"Function {function.name} - deployed!") + if trigger_type == Trigger.TriggerType.HTTP: invoke_url = status_res["httpsTrigger"]["url"] - trigger = HTTPTrigger(invoke_url) + self.logging.info(f"Created HTTP trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(function.name, self) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(function.name) + self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") @@ -311,12 +399,20 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def cached_function(self, function: Function): from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger + from sebs.gcp.triggers import LibraryTrigger, QueueTrigger, StorageTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers gcp_trigger.deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.QUEUE): + gcp_trigger = cast(QueueTrigger, trigger) + gcp_trigger.logging_handlers = self.logging_handlers + gcp_trigger.deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.STORAGE): + gcp_trigger = cast(StorageTrigger, trigger) + gcp_trigger.logging_handlers = self.logging_handlers + gcp_trigger.deployment_client = self def update_function(self, function: Function, code_package: Benchmark): @@ -331,6 +427,11 @@ def update_function(self, function: Function, code_package: Benchmark): full_func_name = GCP.get_full_function_name( self.config.project_name, self.config.region, function.name ) + + # Before creating the function, ensure all trigger resources (queue, + # bucket) exist on GCP. + trigger_info = self.create_trigger_resource(function.name) + req = ( self.function_client.projects() .locations() @@ -339,13 +440,11 @@ def update_function(self, function: Function, code_package: Benchmark): name=full_func_name, body={ "name": full_func_name, - "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", - "httpsTrigger": {}, "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - }, + } | trigger_info, ) ) res = req.execute() diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6c..2ad08637 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -1,9 +1,15 @@ +import base64 import concurrent.futures import datetime import json +import os import time +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError from typing import Dict, Optional # noqa +from google.cloud import storage as gcp_storage + from sebs.gcp.gcp import GCP from sebs.faas.function import ExecutionResult, Trigger @@ -111,3 +117,113 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) + + +class QueueTrigger(Trigger): + def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + super().__init__() + self.name = fname + self._deployment_client = deployment_client + + @staticmethod + def typename() -> str: + return "GCP.QueueTrigger" + + @property + def deployment_client(self) -> GCP: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: GCP): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + pub_sub = build("pubsub", "v1", cache_discovery=False) + + # Prep + # GCP is very particular with data encoding... + serialized_payload = base64.b64encode(json.dumps(payload).encode("ascii")) + + # Publish payload to queue + pub_sub.projects().topics().publish( + topic=self.deployment_client.get_trigger_resource_name(self.name), + body={ + "messages": [{ + "data": serialized_payload.decode("utf-8") + }], + } + ).execute() + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Queue", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger(obj["name"]) + + +class StorageTrigger(Trigger): + def __init__(self, fname: str): + super().__init__() + self.name = fname + + @staticmethod + def typename() -> str: + return "GCP.StorageTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init clients + bucket_name = self.name + client = gcp_storage.Client(); + bucket_instance = client.bucket(bucket_name) + + # Prep + file_name = "payload.json" + with open(file_name, "w") as fp: + json.dump(payload, fp) + + # Upload object + gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 + blob = bucket_instance.blob(blob_name=payload, chunk_size=4 * 1024 * 1024) + blob.upload_from_filename(file_name) + + self.logging.info(f"Uploaded payload to bucket {bucket_name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Storage", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger(obj["name"]) From 63ab522fd98763dfcb7d2dcb31b7ddc4a9f1a461 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:29:01 +0200 Subject: [PATCH 04/26] Azure queue and storage triggers --- benchmarks/wrappers/azure/python/handler.py | 24 +++- sebs.py | 13 +- sebs/azure/azure.py | 136 +++++++++++++++++--- sebs/azure/function.py | 20 ++- sebs/azure/triggers.py | 125 ++++++++++++++++++ sebs/benchmark.py | 6 +- sebs/experiments/config.py | 9 +- 7 files changed, 301 insertions(+), 32 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 5f7f14f2..6375de39 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -1,12 +1,11 @@ -import datetime, io, json, os, uuid +import base64 +import datetime, io, json, logging, os, uuid import azure.functions as func -# TODO: usual trigger -# implement support for blob and others -def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: +def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() if 'connection_string' in req_json: @@ -73,3 +72,20 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: mimetype="application/json" ) +def handler_queue(msg: func.QueueMessage): + logging.info('Python queue trigger function processed a queue item.') + payload = msg.get_body().decode('utf-8') + + from . import function + ret = function.handler(payload) + + # TODO(oana) + +def handler_storage(blob: func.InputStream): + logging.info('Python Blob trigger function processed %s', blob.name) + payload = blob.readline().decode('utf-8') # TODO(oana) + + from . import function + ret = function.handler(payload) + + # TODO(oana) diff --git a/sebs.py b/sebs.py index fe25155f..50bca451 100755 --- a/sebs.py +++ b/sebs.py @@ -166,7 +166,7 @@ def benchmark(): @click.option("--repetitions", default=5, type=int, help="Number of experimental repetitions.") @click.option( "--trigger", - type=click.Choice(["library", "http"]), + type=click.Choice(["library", "http", "queue", "storage"]), default="http", help="Function trigger to be used.", ) @@ -217,6 +217,9 @@ def invoke( if image_tag_prefix is not None: sebs_client.config.image_tag_prefix = image_tag_prefix + # Insert trigger into (experiment) config. Required by Azure when packaging. + update_nested_dict(config, ["experiments", "trigger"], (trigger if trigger is not None else "http")) + experiment_config = sebs_client.get_experiment_config(config["experiments"]) update_nested_dict(config, ["experiments", "benchmark"], benchmark) benchmark_obj = sebs_client.get_benchmark( @@ -230,9 +233,15 @@ def invoke( if timeout is not None: benchmark_obj.benchmark_config.timeout = timeout + function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) + + # GCP: augment function name with trigger type: _http, _queue etc. + if deployment_client.name() == "gcp" or deployment_client.name() == "azure": + function_name = "{}-{}".format(function_name, trigger) + func = deployment_client.get_function( benchmark_obj, - function_name if function_name else deployment_client.default_function_name(benchmark_obj), + function_name, ) storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 17316c2b..974e1bcb 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -11,7 +11,7 @@ from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction from sebs.azure.config import AzureConfig, AzureResources -from sebs.azure.triggers import AzureTrigger, HTTPTrigger +from sebs.azure.triggers import AzureTrigger, HTTPTrigger, QueueTrigger, StorageTrigger from sebs.faas.function import Trigger from sebs.benchmark import Benchmark from sebs.cache import Cache @@ -35,6 +35,10 @@ class Azure(System): def name(): return "azure" + @staticmethod + def typename(): + return "Azure" + @property def config(self) -> AzureConfig: return self._config @@ -114,6 +118,60 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage.replace_existing = replace_existing return self.storage + """ + Composes the JSON config that describes the trigger and bindings configs + for a given function to be run on Azure. + + :param benchmark: + :param exec_files: the files which define and implement the function to be executed + :return: JSON dictionary containing the function configuration + """ + def create_function_json(self, benchmark, exec_files) -> Dict: + trigger = benchmark.split("-")[-1] + + if (trigger == "queue"): + return { + "scriptFile": exec_files, + "entryPoint": "handler_queue", + "bindings": [ + { + "name": "msg", + "type": "queueTrigger", + "direction": "in", + "queueName": benchmark, + "connection": "AzureWebJobsStorage" + } + ] + } + elif (trigger == "storage"): + return { + "scriptFile": exec_files, + "entryPoint": "handler_storage", + "bindings": [ + { + "name": "blob", + "type": "blobTrigger", + "direction": "in", + "path": benchmark, + "connection": "AzureWebJobsStorage" + } + ] + } + return { # HTTP + "scriptFile": exec_files, + "entryPoint": "handler_http", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["get", "post"], + }, + {"type": "http", "direction": "out", "name": "$return"}, + ], + } + # Directory structure # handler # - source files @@ -148,23 +206,26 @@ def package_code( source_file = os.path.join(directory, f) shutil.move(source_file, handler_dir) + benchmark_stripped = '-'.join(benchmark.split("-")[:-1]) + trigger = benchmark.split("-")[-1] + func_name = ( + "{}-{}-{}-{}-{}".format( + benchmark_stripped, + language_name, + language_version, + self.config.resources_id, + trigger + ) + .replace(".", "-") + .replace("_", "-") + ) + # generate function.json - # TODO: extension to other triggers than HTTP - default_function_json = { - "scriptFile": EXEC_FILES[language_name], - "bindings": [ - { - "authLevel": "anonymous", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["get", "post"], - }, - {"type": "http", "direction": "out", "name": "$return"}, - ], - } json_out = os.path.join(directory, "handler", "function.json") - json.dump(default_function_json, open(json_out, "w"), indent=2) + json.dump( + self.create_function_json(func_name, EXEC_FILES[language_name]), + open(json_out, "w"), indent=2 + ) # generate host.json default_host_json = { @@ -258,9 +319,11 @@ def update_function(self, function: Function, code_package: Benchmark): self._mount_function_code(code_package) url = self.publish_function(function, code_package, True) - trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) + # TODO(oana): this might need refactoring + if (function.name.endswith("http")): + trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) def update_function_configuration(self, function: Function, code_package: Benchmark): # FIXME: this does nothing currently - we don't specify timeout @@ -368,7 +431,6 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct return function def cached_function(self, function: Function): - data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) @@ -494,8 +556,40 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) """ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - raise NotImplementedError() + from sebs.azure.triggers import QueueTrigger, StorageTrigger + azure_function = cast(AzureFunction, function) + resource_group = self.config.resources.resource_group(self.cli_instance) + storage_account = azure_function.function_storage.account_name + + ret = self.cli_instance.execute( + ('az storage account show --resource-group {} --name {} --query id') + .format(resource_group, storage_account) + ) + self.cli_instance.execute( + ('az role assignment create --assignee "{}" \ + --role "Storage {} Data Contributor" \ + --scope {}') + .format( + os.environ["AZURE_USER_PRINCIPAL_NAME"], + "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", + ret.decode("utf-8") + ) + ) + + if trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(function.name, storage_account) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(function.name, storage_account) + self.logging.info(f"Created Storage trigger for {function.name} function") + else: + raise RuntimeError("Not supported!") + + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) + self.cache_client.update_function(function) + return trigger # # def create_azure_function(self, fname, config): diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 61ef4c57..c822f545 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,3 +1,5 @@ +from typing import cast + from sebs.azure.config import AzureResources from sebs.faas.function import Function, FunctionConfig @@ -13,6 +15,10 @@ def __init__( ): super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage + + @staticmethod + def typename() -> str: + return "Azure.AzureFunction" def serialize(self) -> dict: return { @@ -22,6 +28,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + from sebs.faas.function import Trigger + from sebs.azure.triggers import HTTPTrigger, \ + QueueTrigger, StorageTrigger + cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], @@ -30,10 +40,14 @@ def deserialize(cached_config: dict) -> Function: AzureResources.Storage.deserialize(cached_config["function_storage"]), cfg, ) - from sebs.azure.triggers import HTTPTrigger - for trigger in cached_config["triggers"]: - trigger_type = {"HTTP": HTTPTrigger}.get(trigger["type"]) + trigger_type = cast( + Trigger, + {"HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger + }.get(trigger["type"]), + ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 66be8c6d..e893e958 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,6 +1,15 @@ +import base64 import concurrent.futures +import json +import os from typing import Any, Dict, Optional # noqa +from azure.core.exceptions import ResourceExistsError +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobServiceClient +from azure.storage.queue import QueueServiceClient, QueueClient, QueueMessage, BinaryBase64DecodePolicy, BinaryBase64EncodePolicy +from sebs.azure.cli import AzureCLI + from sebs.azure.config import AzureResources from sebs.faas.function import ExecutionResult, Trigger @@ -45,3 +54,119 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) + + +class QueueTrigger(Trigger): + def __init__(self, fname: str, storage_account: str): + super().__init__() + self.name = fname + self.storage_account = storage_account + + @staticmethod + def typename() -> str: + return "Azure.QueueTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + account_url = f"https://{self.storage_account}.queue.core.windows.net" + default_credential = DefaultAzureCredential() + queue_client = QueueClient(account_url, + queue_name=self.name, + credential=default_credential) + + serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') + + # Create queue + self.logging.info(f"Creating queue {self.name}") + + try: + queue_client.create_queue() + self.logging.info("Created queue") + except ResourceExistsError: + self.logging.info("Queue already exists, reusing...") + + # Publish payload to queue + queue_client.send_message(serialized_payload) + self.logging.info(f"Sent message to queue {self.name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Queue", "name": self.name, "storage_account": self.storage_account} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger(obj["name"], obj["storage_account"]) + + +class StorageTrigger(Trigger): + def __init__(self, fname: str, storage_account: str): + super().__init__() + self.name = fname + self.storage_account = storage_account + + @staticmethod + def typename() -> str: + return "Azure.StorageTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + account_url = f"https://{self.storage_account}.blob.core.windows.net" + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(account_url, credential=default_credential) + + # Create container + container_name = self.name + self.logging.info(f"Creating container {container_name}") + try: + blob_service_client.create_container(container_name) + self.logging.info("Created container") + except ResourceExistsError: + self.logging.info("Container already exists, reusing...") + + # Prepare blob + file_name = "payload.json" + with open(file_name, 'w') as fp: + json.dump(payload, fp) + + # Upload blob + blob_client = blob_service_client.get_blob_client(container=container_name, + blob=file_name) + with open(file=file_name, mode="rb") as payload: + blob_client.upload_blob(payload, overwrite=True) + self.logging.info(f"Uploaded payload to container {container_name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Storage", "name": self.name, "storage_account": self.storage_account} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger(obj["name"], obj["storage_account"]) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 4673647c..e1abc419 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -493,6 +493,10 @@ def build( shutil.rmtree(self._output_dir) os.makedirs(self._output_dir) + benchmark = self.benchmark + if self._deployment_name == "azure": + benchmark = "{}-{}".format(benchmark, self._experiment_config.trigger) + self.copy_code(self._output_dir) self.add_benchmark_data(self._output_dir) self.add_deployment_files(self._output_dir) @@ -502,7 +506,7 @@ def build( os.path.abspath(self._output_dir), self.language_name, self.language_version, - self.benchmark, + benchmark, self.is_cached, ) self.logging.info( diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index a5ca3f0b..51cedd52 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -1,6 +1,6 @@ from typing import Dict -from sebs.faas.function import Runtime +from sebs.faas.function import Runtime, Trigger class Config: @@ -11,6 +11,7 @@ def __init__(self): self._flags: Dict[str, bool] = {} self._experiment_configs: Dict[str, dict] = {} self._runtime = Runtime(None, None) + self._trigger: Trigger.TriggerType @property def update_code(self) -> bool: @@ -31,6 +32,10 @@ def check_flag(self, key: str) -> bool: def runtime(self) -> Runtime: return self._runtime + @property + def trigger(self) -> Trigger.TriggerType: + return self._trigger + def experiment_settings(self, name: str) -> dict: return self._experiment_configs[name] @@ -42,6 +47,7 @@ def serialize(self) -> dict: "runtime": self._runtime.serialize(), "flags": self._flags, "experiments": self._experiment_configs, + "trigger": self._trigger, } return out @@ -55,6 +61,7 @@ def deserialize(config: dict) -> "Config": cfg._download_results = config["download_results"] cfg._runtime = Runtime.deserialize(config["runtime"]) cfg._flags = config["flags"] if "flags" in config else {} + cfg._trigger = config["trigger"] if "trigger" in config else {} from sebs.experiments import ( NetworkPingPong, From 0f7454a0e5573f3c660131f8f654555115ba6f65 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 18:29:11 +0200 Subject: [PATCH 05/26] Linting --- sebs/aws/function.py | 6 +++++- sebs/aws/triggers.py | 16 ++++++++-------- sebs/azure/azure.py | 2 +- sebs/azure/function.py | 7 +++---- sebs/azure/triggers.py | 4 +--- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 9c125faf..de4fcc6e 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -55,7 +55,11 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get(trigger["type"]), + {"Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger + }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 34ffafae..a3ed2ff3 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -2,7 +2,7 @@ import concurrent.futures import datetime import json -from typing import Dict, Optional +from typing import Optional import uuid # noqa import boto3 @@ -160,22 +160,22 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: sqs_client = boto3.client('sqs', region_name=self.deployment_client.config.region) serialized_payload = json.dumps(payload) - + # Create queue self.logging.debug(f"Creating queue {self.name}") - + queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] queue_arn = sqs_client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=["QueueArn"] )["Attributes"]["QueueArn"] - self.logging.debug(f"Created queue") + self.logging.debug("Created queue") # Add queue trigger if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, FunctionName=self.name) - ["EventSourceMappings"])): + ["EventSourceMappings"])): lambda_client.create_event_source_mapping( EventSourceArn=queue_arn, FunctionName=self.name, @@ -236,7 +236,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prep serialized_payload = json.dumps(payload) bucket_name = self.name.replace('_', '-') # AWS disallows underscores in bucket names - function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"]["FunctionArn"] + function_arn = lambda_client.get_function(FunctionName=self.name) \ + ["Configuration"]["FunctionArn"] # Create bucket self.logging.info(f"Creating bucket {bucket_name}") @@ -273,8 +274,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: ], }, - ]} - ) + ]}) # Put object s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index a2c9a7f5..3ac14499 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -12,7 +12,7 @@ from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction from sebs.azure.config import AzureConfig, AzureResources -from sebs.azure.triggers import AzureTrigger, HTTPTrigger, QueueTrigger, StorageTrigger +from sebs.azure.triggers import AzureTrigger, HTTPTrigger from sebs.faas.function import Trigger from sebs.benchmark import Benchmark from sebs.cache import Cache diff --git a/sebs/azure/function.py b/sebs/azure/function.py index c822f545..8970d90d 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -15,7 +15,7 @@ def __init__( ): super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage - + @staticmethod def typename() -> str: return "Azure.AzureFunction" @@ -29,8 +29,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: from sebs.faas.function import Trigger - from sebs.azure.triggers import HTTPTrigger, \ - QueueTrigger, StorageTrigger + from sebs.azure.triggers import HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( @@ -46,7 +45,7 @@ def deserialize(cached_config: dict) -> Function: {"HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger - }.get(trigger["type"]), + }.get(trigger["type"]) ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index e893e958..ed3c3eb2 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,14 +1,12 @@ import base64 import concurrent.futures import json -import os from typing import Any, Dict, Optional # noqa from azure.core.exceptions import ResourceExistsError from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient -from azure.storage.queue import QueueServiceClient, QueueClient, QueueMessage, BinaryBase64DecodePolicy, BinaryBase64EncodePolicy -from sebs.azure.cli import AzureCLI +from azure.storage.queue import QueueClient from sebs.azure.config import AzureResources from sebs.faas.function import ExecutionResult, Trigger From 8f96edab6ab488b227454d89826c78cb4080d794 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 20 May 2024 18:35:18 +0200 Subject: [PATCH 06/26] Address comments, lint --- benchmarks/wrappers/aws/python/handler.py | 10 ++--- benchmarks/wrappers/aws/python/storage.py | 3 ++ benchmarks/wrappers/gcp/python/handler.py | 11 ++--- requirements.azure.txt | 2 + sebs.py | 9 +++- sebs/aws/aws.py | 54 +++++++++++++++-------- sebs/aws/config.py | 24 ++++++---- sebs/aws/function.py | 8 ++-- sebs/aws/s3.py | 27 ++++++++---- sebs/aws/triggers.py | 41 ++++++++++------- sebs/azure/azure.py | 13 +++--- sebs/benchmark.py | 4 +- sebs/cache.py | 25 ++++++++--- sebs/gcp/triggers.py | 4 +- 14 files changed, 152 insertions(+), 83 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 0bcfeab0..2601dddf 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,6 +1,4 @@ - import datetime, io, json, os, sys, uuid -import boto3 # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) @@ -15,11 +13,13 @@ def handler(event, context): # Storage trigger if ("Records" in event and "s3" in event["Records"][0]): - s3_client = boto3.client('s3') bucket_name = event["Records"][0]["s3"]["bucket"]["name"] file_name = event["Records"][0]["s3"]["object"]["key"] - - obj = s3_client.get_object(Bucket=bucket_name, Key=file_name) + + from function import storage + storage_inst = storage.storage.get_instance() + + obj = storage_inst.get_object(bucket_name, file_name) event = json.loads(obj['Body'].read()) # HTTP trigger with API Gateaway diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 4be0025e..602319df 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -46,6 +46,9 @@ def download_stream(self, bucket, file): data = io.BytesIO() self.client.download_fileobj(bucket, file, data) return data.getbuffer() + + def get_object(self, bucket, file): + return self.client.get_object(Bucket=bucket, Key=file) def get_instance(): if storage.instance is None: diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index e5093061..6a1284e5 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -66,7 +66,7 @@ def handler_http(req): def handler_queue(data, context): serialized_payload = data.get('data') - payload = json.loads(base64.b64decode(serialized_payload).decode("ascii")) + payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) from function import function ret = function.handler(payload) @@ -77,13 +77,10 @@ def handler_storage(data, context): bucket_name = data.get('bucket') name = data.get('name') filepath = '/tmp/bucket_contents' - client = gcp_storage.Client(); - print("Download {}:{} to {}".format(bucket_name, name, filepath)) - print(data) - bucket_instance = client.bucket(bucket_name) - blob = bucket_instance.blob(name) - blob.download_to_filename(filepath) + from function import storage + storage_inst = storage.storage.get_instance() + storage_inst.download(bucket_name, name, filepath) payload = {} diff --git a/requirements.azure.txt b/requirements.azure.txt index f7d82499..4fed51ac 100644 --- a/requirements.azure.txt +++ b/requirements.azure.txt @@ -1 +1,3 @@ azure-storage-blob==12.10.0 +azure-storage-queue==12.9.0 +azure-identity==1.16.0 diff --git a/sebs.py b/sebs.py index 3f9649c9..567074ae 100755 --- a/sebs.py +++ b/sebs.py @@ -225,7 +225,9 @@ def invoke( sebs_client.config.image_tag_prefix = image_tag_prefix # Insert trigger into (experiment) config. Required by Azure when packaging. - update_nested_dict(config, ["experiments", "trigger"], (trigger if trigger is not None else "http")) + # TODO(oana) is this still needed + trigger = trigger if trigger is not None else "http" + update_nested_dict(config, ["experiments", "trigger"], trigger) experiment_config = sebs_client.get_experiment_config(config["experiments"]) update_nested_dict(config, ["experiments", "benchmark"], benchmark) @@ -242,7 +244,10 @@ def invoke( function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) - # GCP: augment function name with trigger type: _http, _queue etc. + # GCP and Azure only allow one trigger per function, so augment function name with + # trigger type: _http, _queue etc. + # + # Additionally, Azure requires for the trigger to be defined at deployment time. if deployment_client.name() == "gcp" or deployment_client.name() == "azure": function_name = "{}-{}".format(function_name, trigger) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index c175a1a1..d48b8e17 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -149,11 +149,13 @@ def package_code( # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), + shell=True, cwd=directory) benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) self.logging.info("Created {} archive".format(benchmark_archive)) - bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive)) + bytes_size = os.path.getsize( + os.path.join(directory, benchmark_archive)) mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) @@ -186,7 +188,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun try: ret = self.client.get_function(FunctionName=func_name) self.logging.info( - "Function {} exists on AWS, retrieve configuration.".format(func_name) + "Function {} exists on AWS, retrieve configuration.".format( + func_name) ) # Here we assume a single Lambda role lambda_function = LambdaFunction( @@ -202,7 +205,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun lambda_function.updated_code = True # TODO: get configuration of REST API except self.client.exceptions.ResourceNotFoundException: - self.logging.info("Creating function {} from {}".format(func_name, package)) + self.logging.info( + "Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size # Limit to 50 MB @@ -216,16 +220,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun else: code_package_name = cast(str, os.path.basename(package)) - code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + code_bucket = storage_client.get_bucket( + Resources.StorageBucketType.DEPLOYMENT) code_prefix = os.path.join(benchmark, code_package_name) storage_client.upload(code_bucket, package, code_prefix) - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + self.logging.info( + "Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} ret = self.client.create_function( FunctionName=func_name, Runtime="{}{}".format( - language, self._map_language_runtime(language, language_runtime) + language, self._map_language_runtime( + language, language_runtime) ), Handler="handler.handler", Role=self.config.resources.lambda_role(self.session), @@ -293,7 +300,8 @@ def update_function(self, function: Function, code_package: Benchmark): # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.client.update_function_code(FunctionName=name, ZipFile=code_body.read()) + self.client.update_function_code( + FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -322,7 +330,8 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark MemorySize=function.config.memory, ) self.wait_function_updated(function) - self.logging.info(f"Updated configuration of {function.name} function. ") + self.logging.info( + f"Updated configuration of {function.name} function. ") @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -391,10 +400,12 @@ def parse_aws_report( return request_id output = requests[request_id] output.request_id = request_id - output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) + output.provider_times.execution = int( + float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: - output.provider_times.initialization = int(float(aws_vals["Init Duration"]) * 1000) + output.provider_times.initialization = int( + float(aws_vals["Init Duration"]) * 1000) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory @@ -428,12 +439,14 @@ def get_invocation_error(self, function_name: str, start_time: int, end_time: in time.sleep(5) response = self.logs_client.get_query_results(queryId=query_id) if len(response["results"]) == 0: - self.logging.info("AWS logs are not yet available, repeat after 15s...") + self.logging.info( + "AWS logs are not yet available, repeat after 15s...") time.sleep(15) response = None else: break - self.logging.error(f"Invocation error for AWS Lambda function {function_name}") + self.logging.error( + f"Invocation error for AWS Lambda function {function_name}") for message in response["results"]: for value in message: if value["field"] == "@message": @@ -480,7 +493,8 @@ def download_metrics( for val in results: for result_part in val: if result_part["field"] == "@message": - request_id = AWS.parse_aws_report(result_part["value"], requests) + request_id = AWS.parse_aws_report( + result_part["value"], requests) if request_id in requests: results_processed += 1 requests_ids.remove(request_id) @@ -497,7 +511,8 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api(api_name, function, self.session) + http_api = self.config.resources.http_api( + api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ # but this is wrong - source arn must be {api-arn}/*/* self.get_lambda_client().add_permission( @@ -520,11 +535,13 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info(f"Created Queue trigger for {func.name} function.") + self.logging.info( + f"Created Queue trigger for {func.name} function.") elif trigger_type == Trigger.TriggerType.STORAGE: trigger = StorageTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info(f"Created Storage trigger for {func.name} function.") + self.logging.info( + f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") @@ -538,7 +555,8 @@ def _enforce_cold_start(self, function: Function): FunctionName=func.name, Timeout=func.config.timeout, MemorySize=func.config.memory, - Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, + Environment={"Variables": { + "ForceColdStart": str(self.cold_start_counter)}}, ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 44c9a490..6de965d4 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -85,7 +85,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def update_cache(self, cache: Cache): - cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) + cache.update_config(val=self.account_id, keys=[ + "aws", "credentials", "account_id"]) def serialize(self) -> dict: out = {"account_id": self._account_id} @@ -145,7 +146,8 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: try: out = iam_client.get_role(RoleName=role_name) self._lambda_role = out["Role"]["Arn"] - self.logging.info(f"AWS: Selected {self._lambda_role} IAM role") + self.logging.info( + f"AWS: Selected {self._lambda_role} IAM role") except iam_client.exceptions.NoSuchEntityException: out = iam_client.create_role( RoleName=role_name, @@ -159,7 +161,8 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: time.sleep(10) # Attach basic AWS Lambda and S3 policies. for policy in attached_policies: - iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy) + iam_client.attach_role_policy( + RoleName=role_name, PolicyArn=policy) return self._lambda_role def http_api( @@ -221,9 +224,11 @@ def serialize(self) -> dict: def update_cache(self, cache: Cache): super().update_cache(cache) - cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) + cache.update_config(val=self._lambda_role, keys=[ + "aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): - cache.update_config(val=api.serialize(), keys=["aws", "resources", "http-apis", name]) + cache.update_config(val=api.serialize(), keys=[ + "aws", "resources", "http-apis", name]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -240,7 +245,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour if "resources" in config: AWSResources.initialize(ret, config["resources"]) ret.logging_handlers = handlers - ret.logging.info("No cached resources for AWS found, using user configuration.") + ret.logging.info( + "No cached resources for AWS found, using user configuration.") else: AWSResources.initialize(ret, {}) ret.logging_handlers = handlers @@ -278,8 +284,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) - resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) + credentials = cast( + AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) + resources = cast(AWSResources, AWSResources.deserialize( + config, cache, handlers)) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/aws/function.py b/sebs/aws/function.py index de4fcc6e..fbdb6d6f 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -59,12 +59,14 @@ def deserialize(cached_config: dict) -> "LambdaFunction": "HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger - }.get(trigger["type"]), + }.get(trigger["type"]), ) - assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + assert trigger_type, "Unknown trigger type {}".format( + trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret def code_bucket(self, benchmark: str, storage_client: S3): - self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + self.bucket = storage_client.get_bucket( + Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 79ca8905..bd550c52 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -54,7 +54,8 @@ def _create_bucket( for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + "Bucket {} for {} already exists, skipping.".format( + bucket_name, name) ) return bucket_name @@ -70,7 +71,8 @@ def _create_bucket( if self.region != "us-east-1": self.client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={"LocationConstraint": self.region}, + CreateBucketConfiguration={ + "LocationConstraint": self.region}, ) else: # This is incredible x2 - boto3 will not throw exception if you recreate @@ -86,7 +88,8 @@ def _create_bucket( self.logging.info("Created bucket {}".format(bucket_name)) except self.client.exceptions.BucketAlreadyExists as e: - self.logging.error(f"The bucket {bucket_name} exists already in region {self.region}!") + self.logging.error( + f"The bucket {bucket_name} exists already in region {self.region}!") raise e except self.client.exceptions.ClientError as e: self.logging.error( @@ -110,7 +113,8 @@ def uploader_func(self, path_idx, key, filepath): for f in self.input_prefixes_files[path_idx]: f_name = f if key == f_name: - self.logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) + self.logging.info( + "Skipping upload of {} to {}".format(filepath, bucket_name)) return self.upload(bucket_name, filepath, key) @@ -120,8 +124,10 @@ def upload(self, bucket_name: str, filepath: str, key: str): self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) def download(self, bucket_name: str, key: str, filepath: str): - self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) - self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) + self.logging.info("Download {}:{} to {}".format( + bucket_name, key, filepath)) + self.client.download_file( + Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: try: @@ -131,7 +137,8 @@ def exists_bucket(self, bucket_name: str) -> bool: return False def list_bucket(self, bucket_name: str, prefix: str = ""): - objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) + objects_list = self.client.list_objects_v2( + Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: objects = [obj["Key"] for obj in objects_list["Contents"]] @@ -149,8 +156,10 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def clean_bucket(self, bucket: str): objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: - objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore - self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore + objects = [{"Key": obj["Key"]} + for obj in objects["Contents"]] # type: ignore + self.client.delete_objects(Bucket=bucket, Delete={ + "Objects": objects}) # type: ignore def remove_bucket(self, bucket: str): self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index a3ed2ff3..9335237e 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -41,19 +41,22 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") + ret = client.invoke(FunctionName=self.name, + Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] if ret["StatusCode"] != 200: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result if "FunctionError" in ret: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result self.logging.debug(f"Invoke of function {self.name} was successful") @@ -67,7 +70,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: if isinstance(function_output["body"], dict): aws_result.parse_benchmark_output(function_output["body"]) else: - aws_result.parse_benchmark_output(json.loads(function_output["body"])) + aws_result.parse_benchmark_output( + json.loads(function_output["body"])) return aws_result def async_invoke(self, payload: dict): @@ -82,8 +86,10 @@ def async_invoke(self, payload: dict): LogType="Tail", ) if ret["StatusCode"] != 202: - self.logging.error("Async invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error( + "Async invocation of {} failed!".format(self.name)) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) raise RuntimeError() return ret @@ -157,7 +163,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Init clients lambda_client = self.deployment_client.get_lambda_client() - sqs_client = boto3.client('sqs', region_name=self.deployment_client.config.region) + sqs_client = boto3.client( + 'sqs', region_name=self.deployment_client.config.region) serialized_payload = json.dumps(payload) @@ -166,16 +173,16 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] queue_arn = sqs_client.get_queue_attributes( - QueueUrl=queue_url, - AttributeNames=["QueueArn"] - )["Attributes"]["QueueArn"] + QueueUrl=queue_url, + AttributeNames=["QueueArn"] + )["Attributes"]["QueueArn"] self.logging.debug("Created queue") # Add queue trigger if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, FunctionName=self.name) - ["EventSourceMappings"])): + ["EventSourceMappings"])): lambda_client.create_event_source_mapping( EventSourceArn=queue_arn, FunctionName=self.name, @@ -183,7 +190,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: ) # Publish payload to queue - sqs_client.send_message(QueueUrl=queue_url, MessageBody=serialized_payload) + sqs_client.send_message( + QueueUrl=queue_url, MessageBody=serialized_payload) self.logging.info(f"Sent message to queue {self.name}") # TODO(oana): gather metrics @@ -235,9 +243,10 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prep serialized_payload = json.dumps(payload) - bucket_name = self.name.replace('_', '-') # AWS disallows underscores in bucket names - function_arn = lambda_client.get_function(FunctionName=self.name) \ - ["Configuration"]["FunctionArn"] + # AWS disallows underscores in bucket names + bucket_name = self.name.replace('_', '-') + function_arn = lambda_client.get_function(FunctionName=self.name)[ + "Configuration"]["FunctionArn"] # Create bucket self.logging.info(f"Creating bucket {bucket_name}") @@ -275,7 +284,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: }, ]}) - + # Put object s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {bucket_name}") diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 3ac14499..aab84146 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -244,7 +244,7 @@ def package_code( benchmark_stripped, language_name, language_version, - self.config.resources_id, + self.config.resources.resources_id, trigger ) .replace(".", "-") @@ -269,7 +269,7 @@ def package_code( json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark_stripped), shell=True, cwd=directory) return directory, code_size def publish_function( @@ -581,18 +581,21 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) resource_group = self.config.resources.resource_group(self.cli_instance) storage_account = azure_function.function_storage.account_name - ret = self.cli_instance.execute( + user_principal_name = self.cli_instance.execute('az ad user list') + + storage_account_scope = self.cli_instance.execute( ('az storage account show --resource-group {} --name {} --query id') .format(resource_group, storage_account) ) + self.cli_instance.execute( ('az role assignment create --assignee "{}" \ --role "Storage {} Data Contributor" \ --scope {}') .format( - os.environ["AZURE_USER_PRINCIPAL_NAME"], + json.loads(user_principal_name.decode("utf-8"))[0]["userPrincipalName"], "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", - ret.decode("utf-8") + storage_account_scope.decode("utf-8") ) ) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 4f0fda73..f39c4bc4 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -524,9 +524,9 @@ def build( # package already exists if self.is_cached: - self._cache_client.update_code_package(self._deployment_name, self.language_name, self) + self._cache_client.update_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) else: - self._cache_client.add_code_package(self._deployment_name, self.language_name, self) + self._cache_client.add_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) self.query_cache() return True, self._code_location diff --git a/sebs/cache.py b/sebs/cache.py index ed5096e6..3a781b58 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -162,14 +162,21 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(cached_config, fp, indent=2) - def add_code_package(self, deployment_name: str, language_name: str, code_package: "Benchmark"): + def add_code_package( + self, deployment_name: str, language_name: str, code_package: "Benchmark", + trigger: Optional[str] + ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) os.makedirs(benchmark_dir, exist_ok=True) - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + + if (deployment_name == "azure"): + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) + else: + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if not os.path.exists(cached_dir): os.makedirs(cached_dir, exist_ok=True) @@ -231,14 +238,20 @@ def add_code_package(self, deployment_name: str, language_name: str, code_packag ) def update_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark" + self, deployment_name: str, language_name: str, code_package: "Benchmark", + trigger: Optional[str] ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + + cached_dir = "" + if (deployment_name == "azure"): + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) + else: + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if os.path.exists(cached_dir): # copy code diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 2ad08637..35b46119 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -151,7 +151,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prep # GCP is very particular with data encoding... - serialized_payload = base64.b64encode(json.dumps(payload).encode("ascii")) + serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) # Publish payload to queue pub_sub.projects().topics().publish( @@ -208,7 +208,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Upload object gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 - blob = bucket_instance.blob(blob_name=payload, chunk_size=4 * 1024 * 1024) + blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) blob.upload_from_filename(file_name) self.logging.info(f"Uploaded payload to bucket {bucket_name}") From 107b53f35008d191fddeee997d76894733e92031 Mon Sep 17 00:00:00 2001 From: orosca Date: Wed, 19 Jun 2024 13:36:32 -0400 Subject: [PATCH 07/26] Cache prep --- docs/modularity.md | 1 + sebs.py | 1 - sebs/aws/aws.py | 1 + sebs/azure/azure.py | 7 +++---- sebs/benchmark.py | 11 ++++------- sebs/cache.py | 23 ++++++----------------- sebs/faas/system.py | 1 + sebs/gcp/gcp.py | 1 + sebs/local/local.py | 1 + sebs/openwhisk/openwhisk.py | 1 + 10 files changed, 19 insertions(+), 29 deletions(-) diff --git a/docs/modularity.md b/docs/modularity.md index 7e3c7fcc..f2614655 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -303,6 +303,7 @@ Implement this step in the following function: language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int] ``` diff --git a/sebs.py b/sebs.py index 567074ae..9f0bf620 100755 --- a/sebs.py +++ b/sebs.py @@ -225,7 +225,6 @@ def invoke( sebs_client.config.image_tag_prefix = image_tag_prefix # Insert trigger into (experiment) config. Required by Azure when packaging. - # TODO(oana) is this still needed trigger = trigger if trigger is not None else "http" update_nested_dict(config, ["experiments", "trigger"], trigger) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index d48b8e17..73c56286 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -132,6 +132,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index aab84146..2946ed58 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -218,6 +218,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages @@ -237,11 +238,9 @@ def package_code( source_file = os.path.join(directory, f) shutil.move(source_file, handler_dir) - benchmark_stripped = '-'.join(benchmark.split("-")[:-1]) - trigger = benchmark.split("-")[-1] func_name = ( "{}-{}-{}-{}-{}".format( - benchmark_stripped, + benchmark, language_name, language_version, self.config.resources.resources_id, @@ -269,7 +268,7 @@ def package_code( json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark_stripped), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) return directory, code_size def publish_function( diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f39c4bc4..e18e0e20 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -494,10 +494,6 @@ def build( shutil.rmtree(self._output_dir) os.makedirs(self._output_dir) - benchmark = self.benchmark - if self._deployment_name == "azure": - benchmark = "{}-{}".format(benchmark, self._experiment_config.trigger) - self.copy_code(self._output_dir) self.add_benchmark_data(self._output_dir) self.add_deployment_files(self._output_dir) @@ -507,8 +503,9 @@ def build( os.path.abspath(self._output_dir), self.language_name, self.language_version, - benchmark, + self.benchmark, self.is_cached_valid, + self._experiment_config.trigger ) self.logging.info( ( @@ -524,9 +521,9 @@ def build( # package already exists if self.is_cached: - self._cache_client.update_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) + self._cache_client.update_code_package(self._deployment_name, self.language_name, self) else: - self._cache_client.add_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) + self._cache_client.add_code_package(self._deployment_name, self.language_name, self) self.query_cache() return True, self._code_location diff --git a/sebs/cache.py b/sebs/cache.py index 3a781b58..daf50ef9 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -163,20 +163,15 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): json.dump(cached_config, fp, indent=2) def add_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark", - trigger: Optional[str] + self, deployment_name: str, language_name: str, code_package: "Benchmark" ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) os.makedirs(benchmark_dir, exist_ok=True) - - if (deployment_name == "azure"): - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) - else: - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if not os.path.exists(cached_dir): os.makedirs(cached_dir, exist_ok=True) @@ -238,20 +233,14 @@ def add_code_package( ) def update_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark", - trigger: Optional[str] + self, deployment_name: str, language_name: str, code_package: "Benchmark" ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - - cached_dir = "" - if (deployment_name == "azure"): - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) - else: - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if os.path.exists(cached_dir): # copy code diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 17116e69..e126310a 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -167,6 +167,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: pass diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 94b15243..c351d204 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -220,6 +220,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/local/local.py b/sebs/local/local.py index cb1aabe2..1c975461 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -132,6 +132,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 00660de9..43c9cd54 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -208,6 +208,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: # Regardless of Docker image status, we need to create .zip file From ba67b4a8d4e739364eed7dc2e86cbdaf432e633b Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 5 Jul 2024 01:31:54 -0400 Subject: [PATCH 08/26] Address comments --- sebs/aws/triggers.py | 210 +++++++++++++++++++++++++---------------- sebs/azure/triggers.py | 133 ++++++++++++++++++-------- sebs/gcp/gcp.py | 88 +++++++++-------- sebs/gcp/triggers.py | 35 ++++--- 4 files changed, 294 insertions(+), 172 deletions(-) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 9335237e..2c62ef76 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -135,15 +135,64 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None, queue_arn: Optional[str] = None, queue_url: Optional[str] = None): super().__init__() self.name = fname - self._deployment_client = deployment_client + + self._deployment_client = None + self._queue_arn = None + self._queue_url = None + + if (deployment_client): + self._deployment_client = deployment_client + if (queue_arn): + self._queue_arn = queue_arn + if (queue_url): + self._queue_url = queue_url + + # When creating the trigger for the first time, also create and store + # queue information. + if (not self.queue_arn and not self.queue_url): + # Init clients + lambda_client = self.deployment_client.get_lambda_client() + sqs_client = boto3.client( + 'sqs', region_name=self.deployment_client.config.region) + + # Create queue + self.logging.debug(f"Creating queue {self.name}") + + self._queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] + self._queue_arn = sqs_client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=["QueueArn"] + )["Attributes"]["QueueArn"] + + self.logging.debug("Created queue") + + # Add queue trigger + if (not len(lambda_client.list_event_source_mappings(EventSourceArn=self.queue_arn, + FunctionName=self.name) + ["EventSourceMappings"])): + lambda_client.create_event_source_mapping( + EventSourceArn=self.queue_arn, + FunctionName=self.name, + MaximumBatchingWindowInSeconds=1 + ) @staticmethod def typename() -> str: return "AWS.QueueTrigger" + @property + def queue_arn(self) -> str: + assert self._queue_arn + return self._queue_arn + + @property + def queue_url(self) -> str: + assert self._queue_url + return self._queue_url + @property def deployment_client(self) -> AWS: assert self._deployment_client @@ -161,37 +210,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - # Init clients - lambda_client = self.deployment_client.get_lambda_client() sqs_client = boto3.client( 'sqs', region_name=self.deployment_client.config.region) - serialized_payload = json.dumps(payload) - - # Create queue - self.logging.debug(f"Creating queue {self.name}") - - queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] - queue_arn = sqs_client.get_queue_attributes( - QueueUrl=queue_url, - AttributeNames=["QueueArn"] - )["Attributes"]["QueueArn"] - - self.logging.debug("Created queue") - - # Add queue trigger - if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, - FunctionName=self.name) - ["EventSourceMappings"])): - lambda_client.create_event_source_mapping( - EventSourceArn=queue_arn, - FunctionName=self.name, - MaximumBatchingWindowInSeconds=1 - ) - # Publish payload to queue + serialized_payload = json.dumps(payload) sqs_client.send_message( - QueueUrl=queue_url, MessageBody=serialized_payload) + QueueUrl=self.queue_url, MessageBody=serialized_payload) self.logging.info(f"Sent message to queue {self.name}") # TODO(oana): gather metrics @@ -203,23 +228,89 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name} + return { + "type": "Queue", + "name": self.name, + "queue_arn": self.queue_arn, + "queue_url": self.queue_url + } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"]) + return QueueTrigger(obj["name"], None, obj["queue_arn"], obj["queue_url"]) class StorageTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None): super().__init__() self.name = fname - self._deployment_client = deployment_client + + self._deployment_client = None + self._bucket_name = None + + if (deployment_client): + self._deployment_client = deployment_client + if (bucket_name): + self._bucket_name = bucket_name + + # When creating the trigger for the first time, also create and store + # storage bucket information. + if (not self.bucket_name): + # Init clients + s3 = boto3.resource('s3') + lambda_client = self.deployment_client.get_lambda_client() + + # AWS disallows underscores in bucket names + self._bucket_name = self.name.replace('_', '-') + function_arn = lambda_client.get_function(FunctionName=self.name)[ + "Configuration"]["FunctionArn"] + + # Create bucket + self.logging.info(f"Creating bucket {self.bucket_name}") + + region = self.deployment_client.config.region + if (region == "us-east-1"): + s3.create_bucket(Bucket=self.bucket_name) + else: + s3.create_bucket( + Bucket=self.bucket_name, + CreateBucketConfiguration={ + "LocationConstraint": region + } + ) + + self.logging.info("Created bucket") + + lambda_client.add_permission( + FunctionName=self.name, + StatementId=str(uuid.uuid1()), + Action="lambda:InvokeFunction", + Principal="s3.amazonaws.com", + SourceArn=f"arn:aws:s3:::{self.bucket_name}", + ) + + # Add bucket trigger + bucket_notification = s3.BucketNotification(self.bucket_name) + bucket_notification.put( + NotificationConfiguration={'LambdaFunctionConfigurations': [ + { + 'LambdaFunctionArn': function_arn, + 'Events': [ + 's3:ObjectCreated:*' + ], + + }, + ]}) @staticmethod def typename() -> str: return "AWS.StorageTrigger" + @property + def bucket_name(self) -> AWS: + assert self._bucket_name + return self._bucket_name + @property def deployment_client(self) -> AWS: assert self._deployment_client @@ -237,57 +328,12 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - # Init clients - lambda_client = self.deployment_client.get_lambda_client() - s3 = boto3.resource('s3') - - # Prep serialized_payload = json.dumps(payload) - # AWS disallows underscores in bucket names - bucket_name = self.name.replace('_', '-') - function_arn = lambda_client.get_function(FunctionName=self.name)[ - "Configuration"]["FunctionArn"] - - # Create bucket - self.logging.info(f"Creating bucket {bucket_name}") - - region = self.deployment_client.config.region - if (region == "us-east-1"): - s3.create_bucket(Bucket=bucket_name) - else: - s3.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": region - } - ) - - self.logging.info("Created bucket") - - lambda_client.add_permission( - FunctionName=self.name, - StatementId=str(uuid.uuid1()), - Action="lambda:InvokeFunction", - Principal="s3.amazonaws.com", - SourceArn=f"arn:aws:s3:::{bucket_name}", - ) - - # Add bucket trigger - bucket_notification = s3.BucketNotification(bucket_name) - bucket_notification.put( - NotificationConfiguration={'LambdaFunctionConfigurations': [ - { - 'LambdaFunctionArn': function_arn, - 'Events': [ - 's3:ObjectCreated:*' - ], - - }, - ]}) # Put object - s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) - self.logging.info(f"Uploaded payload to bucket {bucket_name}") + s3 = boto3.resource('s3') + s3.Object(self.bucket_name, 'payload.json').put(Body=serialized_payload) + self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") # TODO(oana): gather metrics @@ -298,8 +344,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name} + return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"]) + return StorageTrigger(obj["name"], None, obj["bucket_name"]) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index ed3c3eb2..e74a3bcd 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -55,10 +55,35 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, storage_account: str): + def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = None): super().__init__() self.name = fname - self.storage_account = storage_account + self._storage_account = storage_account + self._queue_name = None + + if (queue_name): + self._queue_name = queue_name + else: + # Having a queue name field is currently a bit contrived - it is mostly a + # device to indicate that a trigger resource exists and is cached. In the + # future, we may adopt a different convention for naming trigger resources, + # at which point this will become truly useful. + self._queue_name = self.name + + # Init client + default_credential = DefaultAzureCredential() + queue_client = QueueClient(self.account_url, + queue_name=self.queue_name, + credential=default_credential) + + # Create queue + self.logging.info(f"Creating queue {self.queue_name}") + + try: + queue_client.create_queue() + self.logging.info("Created queue") + except ResourceExistsError: + self.logging.info("Queue already exists, reusing...") @staticmethod def typename() -> str: @@ -68,31 +93,34 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.QUEUE + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account + + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.queue.core.windows.net" + + @property + def queue_name(self) -> str: + assert self._queue_name + return self._queue_name + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") - # Init client - account_url = f"https://{self.storage_account}.queue.core.windows.net" + # Prepare queue client default_credential = DefaultAzureCredential() - queue_client = QueueClient(account_url, - queue_name=self.name, + queue_client = QueueClient(self.account_url, + queue_name=self.queue_name, credential=default_credential) - serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') - - # Create queue - self.logging.info(f"Creating queue {self.name}") - - try: - queue_client.create_queue() - self.logging.info("Created queue") - except ResourceExistsError: - self.logging.info("Queue already exists, reusing...") - # Publish payload to queue + serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') queue_client.send_message(serialized_payload) - self.logging.info(f"Sent message to queue {self.name}") + self.logging.info(f"Sent message to queue {self.queue_name}") # TODO(oana): gather metrics @@ -103,18 +131,39 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name, "storage_account": self.storage_account} + return {"type": "Queue", "name": self.name, "storage_account": self.storage_account, "queue_name": self.queue_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], obj["storage_account"]) + return QueueTrigger(obj["name"], obj["storage_account"], obj["queue_name"]) class StorageTrigger(Trigger): - def __init__(self, fname: str, storage_account: str): + def __init__(self, fname: str, storage_account: str, container_name: Optional[str] = None): super().__init__() self.name = fname - self.storage_account = storage_account + self._storage_account = storage_account + + if (container_name): + self._container_name = container_name + else: + # Having a container name field is currently a bit contrived - it is mostly + # a device to indicate that a trigger resource exists and is cached. In the + # future, we may adopt a different convention for naming trigger resources, + # at which point this will become truly useful. + self._container_name = self.name + + # Init client + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) + + # Create container + self.logging.info(f"Creating container {self.container_name}") + try: + blob_service_client.create_container(self.container_name) + self.logging.info("Created container") + except ResourceExistsError: + self.logging.info("Container already exists, reusing...") @staticmethod def typename() -> str: @@ -124,35 +173,39 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.STORAGE - def sync_invoke(self, payload: dict) -> ExecutionResult: + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account - self.logging.info(f"Invoke function {self.name}") + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.blob.core.windows.net" - # Init client - account_url = f"https://{self.storage_account}.blob.core.windows.net" - default_credential = DefaultAzureCredential() - blob_service_client = BlobServiceClient(account_url, credential=default_credential) + @property + def container_name(self) -> str: + assert self._container_name + return self._container_name - # Create container - container_name = self.name - self.logging.info(f"Creating container {container_name}") - try: - blob_service_client.create_container(container_name) - self.logging.info("Created container") - except ResourceExistsError: - self.logging.info("Container already exists, reusing...") + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") # Prepare blob file_name = "payload.json" with open(file_name, 'w') as fp: json.dump(payload, fp) + # Init client + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) + # Upload blob - blob_client = blob_service_client.get_blob_client(container=container_name, + blob_client = blob_service_client.get_blob_client(container=self.container_name, blob=file_name) with open(file=file_name, mode="rb") as payload: blob_client.upload_blob(payload, overwrite=True) - self.logging.info(f"Uploaded payload to container {container_name}") + self.logging.info(f"Uploaded payload to container {self.container_name}") # TODO(oana): gather metrics @@ -163,8 +216,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "storage_account": self.storage_account} + return {"type": "Storage", "name": self.name, "storage_account": self.storage_account, "container_name": self.container_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], obj["storage_account"]) + return StorageTrigger(obj["name"], obj["storage_account"], obj["container_name"]) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index c351d204..1f093876 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -106,22 +106,21 @@ def get_storage( """ Provide the fully qualified name of a trigger resource (queue or storage). """ + def get_trigger_resource_name(self, func_name: str) -> str: trigger = func_name.split("-")[-1] assert trigger == "queue" or trigger == "storage" - if (trigger == "queue"): - return 'projects/{project_name}/topics/{topic}'.format( - project_name=self.config.project_name, - topic=func_name + if trigger == "queue": + return "projects/{project_name}/topics/{topic}".format( + project_name=self.config.project_name, topic=func_name ) else: - return 'projects/{project_name}/buckets/{bucket}'.format( - project_name=self.config.project_name, - bucket=func_name + return "projects/{project_name}/buckets/{bucket}".format( + project_name=self.config.project_name, bucket=func_name ) - + """ Trigger resources (queue, bucket) must exist on GCP before the corresponding function is first deployed. @@ -133,23 +132,30 @@ def get_trigger_resource_name(self, func_name: str) -> str: :param func_name: the name of the function to be deployed, including its trigger + :param cached: when True, skip the creation of the actual resource + - merely create the configuration required to deploy the function. + This option is used in update_function() only. + :return: JSON/dict with the trigger configuration required by GCP on function creation/update """ - def create_trigger_resource(self, func_name: str) -> Dict: + + def create_trigger_resource(self, func_name: str, cached=False) -> Dict: trigger = func_name.split("-")[-1] - if (trigger == "queue"): - pub_sub = build("pubsub", "v1", cache_discovery=False) + if trigger == "queue": topic_name = self.get_trigger_resource_name(func_name) - - self.logging.info(f"Creating queue '{topic_name}'") - try: - pub_sub.projects().topics().create(name=topic_name).execute() - self.logging.info("Created queue") - except HttpError as http_error: - if (http_error.resp.status == 409): - self.logging.info("Queue already exists, reusing...") + + if not cached: + pub_sub = build("pubsub", "v1", cache_discovery=False) + + self.logging.info(f"Creating queue '{topic_name}'") + try: + pub_sub.projects().topics().create(name=topic_name).execute() + self.logging.info("Created queue") + except HttpError as http_error: + if http_error.resp.status == 409: + self.logging.info("Queue already exists, reusing...") return { "eventTrigger": { @@ -158,21 +164,23 @@ def create_trigger_resource(self, func_name: str) -> Dict: }, "entryPoint": "handler_queue", } - elif (trigger == "storage"): - storage = build("storage", "v1", cache_discovery=False) + elif trigger == "storage": bucket_name = self.get_trigger_resource_name(func_name) - self.logging.info(f"Creating storage bucket '{bucket_name}'") - try: - storage.buckets().insert( - project=self.config.project_name, - body={ "name": func_name }, - ).execute() - self.logging.info("Created storage bucket") - except HttpError as http_error: - if (http_error.resp.status == 409): - self.logging.info("Storage bucket already exists, reusing...") - + if not cached: + storage = build("storage", "v1", cache_discovery=False) + + self.logging.info(f"Creating storage bucket '{bucket_name}'") + try: + storage.buckets().insert( + project=self.config.project_name, + body={"name": func_name}, + ).execute() + self.logging.info("Created storage bucket") + except HttpError as http_error: + if http_error.resp.status == 409: + self.logging.info("Storage bucket already exists, reusing...") + return { "eventTrigger": { "eventType": "google.storage.object.finalize", @@ -181,7 +189,7 @@ def create_trigger_resource(self, func_name: str) -> Dict: "entryPoint": "handler_storage", } # HTTP triggers do not require resource creation - return { "httpsTrigger": {}, "entryPoint": "handler_http" } + return {"httpsTrigger": {}, "entryPoint": "handler_http"} @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -318,7 +326,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti "timeout": str(timeout) + "s", "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - } | trigger_info, + } + | trigger_info, ) ) create_req.execute() @@ -390,10 +399,12 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) trigger = HTTPTrigger(invoke_url) self.logging.info(f"Created HTTP trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.QUEUE: - trigger = QueueTrigger(function.name, self) + trigger = QueueTrigger( + function.name, self.get_trigger_resource_name(function.name), self + ) self.logging.info(f"Created Queue trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name) + trigger = StorageTrigger(function.name, self.get_trigger_resource_name(function.name)) self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") @@ -437,7 +448,7 @@ def update_function(self, function: Function, code_package: Benchmark): # Before creating the function, ensure all trigger resources (queue, # bucket) exist on GCP. - trigger_info = self.create_trigger_resource(function.name) + trigger_info = self.create_trigger_resource(function.name, cached=True) req = ( self.function_client.projects() @@ -451,7 +462,8 @@ def update_function(self, function: Function, code_package: Benchmark): "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - } | trigger_info, + } + | trigger_info, ) ) res = req.execute() diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 35b46119..753113c1 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -120,15 +120,21 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + def __init__(self, fname: str, queue_name: str, deployment_client: Optional[GCP] = None): super().__init__() self.name = fname self._deployment_client = deployment_client + self._queue_name = queue_name @staticmethod def typename() -> str: return "GCP.QueueTrigger" + @property + def queue_name(self) -> GCP: + assert self._queue_name + return self._queue_name + @property def deployment_client(self) -> GCP: assert self._deployment_client @@ -149,13 +155,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Init client pub_sub = build("pubsub", "v1", cache_discovery=False) - # Prep + # Prepare payload # GCP is very particular with data encoding... serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) # Publish payload to queue pub_sub.projects().topics().publish( - topic=self.deployment_client.get_trigger_resource_name(self.name), + topic=self.queue_name, body={ "messages": [{ "data": serialized_payload.decode("utf-8") @@ -172,17 +178,18 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name} + return {"type": "Queue", "name": self.name, "queue_name": self.queue_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"]) + return QueueTrigger(obj["name"], obj["queue_name"]) class StorageTrigger(Trigger): - def __init__(self, fname: str): + def __init__(self, fname: str, bucket_name: str): super().__init__() self.name = fname + self._bucket_name = bucket_name @staticmethod def typename() -> str: @@ -192,16 +199,20 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.STORAGE + @property + def bucket_name(self) -> GCP: + assert self._bucket_name + return self._bucket_name + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") # Init clients - bucket_name = self.name client = gcp_storage.Client(); - bucket_instance = client.bucket(bucket_name) + bucket_instance = client.bucket(self.bucket_name) - # Prep + # Prepare payload file_name = "payload.json" with open(file_name, "w") as fp: json.dump(payload, fp) @@ -211,7 +222,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) blob.upload_from_filename(file_name) - self.logging.info(f"Uploaded payload to bucket {bucket_name}") + self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") # TODO(oana): gather metrics @@ -222,8 +233,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name} + return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"]) + return StorageTrigger(obj["name"], obj["bucket_name"]) From 94a675aa0a4e30fe9888308a69e8289f3843f343 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:12:05 +0200 Subject: [PATCH 09/26] [aws] Linting --- sebs/aws/aws.py | 55 ++++++++-------------- sebs/aws/config.py | 24 ++++------ sebs/aws/function.py | 17 ++++--- sebs/aws/s3.py | 27 ++++------- sebs/aws/triggers.py | 107 +++++++++++++++++++++---------------------- 5 files changed, 97 insertions(+), 133 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 73c56286..92c65dcc 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -150,13 +150,11 @@ def package_code( # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(benchmark), - shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) self.logging.info("Created {} archive".format(benchmark_archive)) - bytes_size = os.path.getsize( - os.path.join(directory, benchmark_archive)) + bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive)) mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) @@ -189,8 +187,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun try: ret = self.client.get_function(FunctionName=func_name) self.logging.info( - "Function {} exists on AWS, retrieve configuration.".format( - func_name) + "Function {} exists on AWS, retrieve configuration.".format(func_name) ) # Here we assume a single Lambda role lambda_function = LambdaFunction( @@ -206,8 +203,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun lambda_function.updated_code = True # TODO: get configuration of REST API except self.client.exceptions.ResourceNotFoundException: - self.logging.info( - "Creating function {} from {}".format(func_name, package)) + self.logging.info("Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size # Limit to 50 MB @@ -221,19 +217,16 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun else: code_package_name = cast(str, os.path.basename(package)) - code_bucket = storage_client.get_bucket( - Resources.StorageBucketType.DEPLOYMENT) + code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) code_prefix = os.path.join(benchmark, code_package_name) storage_client.upload(code_bucket, package, code_prefix) - self.logging.info( - "Uploading function {} code to {}".format(func_name, code_bucket)) + self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} ret = self.client.create_function( FunctionName=func_name, Runtime="{}{}".format( - language, self._map_language_runtime( - language, language_runtime) + language, self._map_language_runtime(language, language_runtime) ), Handler="handler.handler", Role=self.config.resources.lambda_role(self.session), @@ -301,8 +294,7 @@ def update_function(self, function: Function, code_package: Benchmark): # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.client.update_function_code( - FunctionName=name, ZipFile=code_body.read()) + self.client.update_function_code(FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -331,8 +323,7 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark MemorySize=function.config.memory, ) self.wait_function_updated(function) - self.logging.info( - f"Updated configuration of {function.name} function. ") + self.logging.info(f"Updated configuration of {function.name} function. ") @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -401,12 +392,10 @@ def parse_aws_report( return request_id output = requests[request_id] output.request_id = request_id - output.provider_times.execution = int( - float(aws_vals["Duration"]) * 1000) + output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: - output.provider_times.initialization = int( - float(aws_vals["Init Duration"]) * 1000) + output.provider_times.initialization = int(float(aws_vals["Init Duration"]) * 1000) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory @@ -440,14 +429,12 @@ def get_invocation_error(self, function_name: str, start_time: int, end_time: in time.sleep(5) response = self.logs_client.get_query_results(queryId=query_id) if len(response["results"]) == 0: - self.logging.info( - "AWS logs are not yet available, repeat after 15s...") + self.logging.info("AWS logs are not yet available, repeat after 15s...") time.sleep(15) response = None else: break - self.logging.error( - f"Invocation error for AWS Lambda function {function_name}") + self.logging.error(f"Invocation error for AWS Lambda function {function_name}") for message in response["results"]: for value in message: if value["field"] == "@message": @@ -494,8 +481,7 @@ def download_metrics( for val in results: for result_part in val: if result_part["field"] == "@message": - request_id = AWS.parse_aws_report( - result_part["value"], requests) + request_id = AWS.parse_aws_report(result_part["value"], requests) if request_id in requests: results_processed += 1 requests_ids.remove(request_id) @@ -509,11 +495,11 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T function = cast(LambdaFunction, func) + trigger: Trigger if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api( - api_name, function, self.session) + http_api = self.config.resources.http_api(api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ # but this is wrong - source arn must be {api-arn}/*/* self.get_lambda_client().add_permission( @@ -536,13 +522,11 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info( - f"Created Queue trigger for {func.name} function.") + self.logging.info(f"Created Queue trigger for {func.name} function.") elif trigger_type == Trigger.TriggerType.STORAGE: trigger = StorageTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info( - f"Created Storage trigger for {func.name} function.") + self.logging.info(f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") @@ -556,8 +540,7 @@ def _enforce_cold_start(self, function: Function): FunctionName=func.name, Timeout=func.config.timeout, MemorySize=func.config.memory, - Environment={"Variables": { - "ForceColdStart": str(self.cold_start_counter)}}, + Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 6de965d4..44c9a490 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -85,8 +85,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def update_cache(self, cache: Cache): - cache.update_config(val=self.account_id, keys=[ - "aws", "credentials", "account_id"]) + cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) def serialize(self) -> dict: out = {"account_id": self._account_id} @@ -146,8 +145,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: try: out = iam_client.get_role(RoleName=role_name) self._lambda_role = out["Role"]["Arn"] - self.logging.info( - f"AWS: Selected {self._lambda_role} IAM role") + self.logging.info(f"AWS: Selected {self._lambda_role} IAM role") except iam_client.exceptions.NoSuchEntityException: out = iam_client.create_role( RoleName=role_name, @@ -161,8 +159,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: time.sleep(10) # Attach basic AWS Lambda and S3 policies. for policy in attached_policies: - iam_client.attach_role_policy( - RoleName=role_name, PolicyArn=policy) + iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy) return self._lambda_role def http_api( @@ -224,11 +221,9 @@ def serialize(self) -> dict: def update_cache(self, cache: Cache): super().update_cache(cache) - cache.update_config(val=self._lambda_role, keys=[ - "aws", "resources", "lambda-role"]) + cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): - cache.update_config(val=api.serialize(), keys=[ - "aws", "resources", "http-apis", name]) + cache.update_config(val=api.serialize(), keys=["aws", "resources", "http-apis", name]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -245,8 +240,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour if "resources" in config: AWSResources.initialize(ret, config["resources"]) ret.logging_handlers = handlers - ret.logging.info( - "No cached resources for AWS found, using user configuration.") + ret.logging.info("No cached resources for AWS found, using user configuration.") else: AWSResources.initialize(ret, {}) ret.logging_handlers = handlers @@ -284,10 +278,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast( - AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) - resources = cast(AWSResources, AWSResources.deserialize( - config, cache, handlers)) + credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) + resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/aws/function.py b/sebs/aws/function.py index fbdb6d6f..24ce4a8d 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -55,18 +55,17 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, - "HTTP": HTTPTrigger, - "Queue": QueueTrigger, - "Storage": StorageTrigger - }.get(trigger["type"]), + { + "Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger, + }.get(trigger["type"]), ) - assert trigger_type, "Unknown trigger type {}".format( - trigger["type"]) + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret def code_bucket(self, benchmark: str, storage_client: S3): - self.bucket = storage_client.get_bucket( - Resources.StorageBucketType.DEPLOYMENT) + self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index bd550c52..79ca8905 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -54,8 +54,7 @@ def _create_bucket( for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format( - bucket_name, name) + "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name @@ -71,8 +70,7 @@ def _create_bucket( if self.region != "us-east-1": self.client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": self.region}, + CreateBucketConfiguration={"LocationConstraint": self.region}, ) else: # This is incredible x2 - boto3 will not throw exception if you recreate @@ -88,8 +86,7 @@ def _create_bucket( self.logging.info("Created bucket {}".format(bucket_name)) except self.client.exceptions.BucketAlreadyExists as e: - self.logging.error( - f"The bucket {bucket_name} exists already in region {self.region}!") + self.logging.error(f"The bucket {bucket_name} exists already in region {self.region}!") raise e except self.client.exceptions.ClientError as e: self.logging.error( @@ -113,8 +110,7 @@ def uploader_func(self, path_idx, key, filepath): for f in self.input_prefixes_files[path_idx]: f_name = f if key == f_name: - self.logging.info( - "Skipping upload of {} to {}".format(filepath, bucket_name)) + self.logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) return self.upload(bucket_name, filepath, key) @@ -124,10 +120,8 @@ def upload(self, bucket_name: str, filepath: str, key: str): self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) def download(self, bucket_name: str, key: str, filepath: str): - self.logging.info("Download {}:{} to {}".format( - bucket_name, key, filepath)) - self.client.download_file( - Bucket=bucket_name, Key=key, Filename=filepath) + self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) + self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: try: @@ -137,8 +131,7 @@ def exists_bucket(self, bucket_name: str) -> bool: return False def list_bucket(self, bucket_name: str, prefix: str = ""): - objects_list = self.client.list_objects_v2( - Bucket=bucket_name, Prefix=prefix) + objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: objects = [obj["Key"] for obj in objects_list["Contents"]] @@ -156,10 +149,8 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def clean_bucket(self, bucket: str): objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: - objects = [{"Key": obj["Key"]} - for obj in objects["Contents"]] # type: ignore - self.client.delete_objects(Bucket=bucket, Delete={ - "Objects": objects}) # type: ignore + objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore + self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore def remove_bucket(self, bucket: str): self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 2c62ef76..c1a47e4f 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -41,22 +41,19 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke(FunctionName=self.name, - Payload=serialized_payload, LogType="Tail") + ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] if ret["StatusCode"] != 200: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result if "FunctionError" in ret: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result self.logging.debug(f"Invoke of function {self.name} was successful") @@ -70,8 +67,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: if isinstance(function_output["body"], dict): aws_result.parse_benchmark_output(function_output["body"]) else: - aws_result.parse_benchmark_output( - json.loads(function_output["body"])) + aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result def async_invoke(self, payload: dict): @@ -86,10 +82,8 @@ def async_invoke(self, payload: dict): LogType="Tail", ) if ret["StatusCode"] != 202: - self.logging.error( - "Async invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Async invocation of {} failed!".format(self.name)) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) raise RuntimeError() return ret @@ -135,7 +129,13 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None, queue_arn: Optional[str] = None, queue_url: Optional[str] = None): + def __init__( + self, + fname: str, + deployment_client: Optional[AWS] = None, + queue_arn: Optional[str] = None, + queue_url: Optional[str] = None, + ): super().__init__() self.name = fname @@ -143,40 +143,40 @@ def __init__(self, fname: str, deployment_client: Optional[AWS] = None, queue_ar self._queue_arn = None self._queue_url = None - if (deployment_client): + if deployment_client: self._deployment_client = deployment_client - if (queue_arn): + if queue_arn: self._queue_arn = queue_arn - if (queue_url): + if queue_url: self._queue_url = queue_url # When creating the trigger for the first time, also create and store # queue information. - if (not self.queue_arn and not self.queue_url): + if not self.queue_arn and not self.queue_url: # Init clients lambda_client = self.deployment_client.get_lambda_client() - sqs_client = boto3.client( - 'sqs', region_name=self.deployment_client.config.region) - + sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) + # Create queue self.logging.debug(f"Creating queue {self.name}") self._queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] self._queue_arn = sqs_client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=["QueueArn"] + QueueUrl=self.queue_url, AttributeNames=["QueueArn"] )["Attributes"]["QueueArn"] self.logging.debug("Created queue") # Add queue trigger - if (not len(lambda_client.list_event_source_mappings(EventSourceArn=self.queue_arn, - FunctionName=self.name) - ["EventSourceMappings"])): + if not len( + lambda_client.list_event_source_mappings( + EventSourceArn=self.queue_arn, FunctionName=self.name + )["EventSourceMappings"] + ): lambda_client.create_event_source_mapping( EventSourceArn=self.queue_arn, FunctionName=self.name, - MaximumBatchingWindowInSeconds=1 + MaximumBatchingWindowInSeconds=1, ) @staticmethod @@ -210,13 +210,11 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - sqs_client = boto3.client( - 'sqs', region_name=self.deployment_client.config.region) + sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) # Publish payload to queue serialized_payload = json.dumps(payload) - sqs_client.send_message( - QueueUrl=self.queue_url, MessageBody=serialized_payload) + sqs_client.send_message(QueueUrl=self.queue_url, MessageBody=serialized_payload) self.logging.info(f"Sent message to queue {self.name}") # TODO(oana): gather metrics @@ -232,7 +230,7 @@ def serialize(self) -> dict: "type": "Queue", "name": self.name, "queue_arn": self.queue_arn, - "queue_url": self.queue_url + "queue_url": self.queue_url, } @staticmethod @@ -241,42 +239,43 @@ def deserialize(obj: dict) -> Trigger: class StorageTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None): + def __init__( + self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None + ): super().__init__() self.name = fname self._deployment_client = None self._bucket_name = None - if (deployment_client): + if deployment_client: self._deployment_client = deployment_client - if (bucket_name): + if bucket_name: self._bucket_name = bucket_name # When creating the trigger for the first time, also create and store # storage bucket information. - if (not self.bucket_name): + if not self.bucket_name: # Init clients - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") lambda_client = self.deployment_client.get_lambda_client() # AWS disallows underscores in bucket names - self._bucket_name = self.name.replace('_', '-') - function_arn = lambda_client.get_function(FunctionName=self.name)[ - "Configuration"]["FunctionArn"] + self._bucket_name = self.name.replace("_", "-") + function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"][ + "FunctionArn" + ] # Create bucket self.logging.info(f"Creating bucket {self.bucket_name}") region = self.deployment_client.config.region - if (region == "us-east-1"): + if region == "us-east-1": s3.create_bucket(Bucket=self.bucket_name) else: s3.create_bucket( Bucket=self.bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": region - } + CreateBucketConfiguration={"LocationConstraint": region}, ) self.logging.info("Created bucket") @@ -292,15 +291,15 @@ def __init__(self, fname: str, deployment_client: Optional[AWS] = None, bucket_n # Add bucket trigger bucket_notification = s3.BucketNotification(self.bucket_name) bucket_notification.put( - NotificationConfiguration={'LambdaFunctionConfigurations': [ - { - 'LambdaFunctionArn': function_arn, - 'Events': [ - 's3:ObjectCreated:*' - ], - - }, - ]}) + NotificationConfiguration={ + "LambdaFunctionConfigurations": [ + { + "LambdaFunctionArn": function_arn, + "Events": ["s3:ObjectCreated:*"], + }, + ] + } + ) @staticmethod def typename() -> str: @@ -331,8 +330,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload) # Put object - s3 = boto3.resource('s3') - s3.Object(self.bucket_name, 'payload.json').put(Body=serialized_payload) + s3 = boto3.resource("s3") + s3.Object(self.bucket_name, "payload.json").put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") # TODO(oana): gather metrics From bb0ade599792da87e5c9777868db4480c9f83082 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:13:31 +0200 Subject: [PATCH 10/26] [azure] Linting --- sebs/azure/azure.py | 43 +++++++++++++++++++++++----------------- sebs/azure/function.py | 7 +++---- sebs/azure/triggers.py | 45 ++++++++++++++++++++++++++---------------- 3 files changed, 56 insertions(+), 39 deletions(-) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 2946ed58..03945274 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -157,10 +157,11 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: :param exec_files: the files which define and implement the function to be executed :return: JSON dictionary containing the function configuration """ + def create_function_json(self, benchmark, exec_files) -> Dict: trigger = benchmark.split("-")[-1] - if (trigger == "queue"): + if trigger == "queue": return { "scriptFile": exec_files, "entryPoint": "handler_queue", @@ -170,11 +171,11 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "queueTrigger", "direction": "in", "queueName": benchmark, - "connection": "AzureWebJobsStorage" + "connection": "AzureWebJobsStorage", } - ] + ], } - elif (trigger == "storage"): + elif trigger == "storage": return { "scriptFile": exec_files, "entryPoint": "handler_storage", @@ -184,9 +185,9 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "blobTrigger", "direction": "in", "path": benchmark, - "connection": "AzureWebJobsStorage" + "connection": "AzureWebJobsStorage", } - ] + ], } return { # HTTP "scriptFile": exec_files, @@ -202,7 +203,7 @@ def create_function_json(self, benchmark, exec_files) -> Dict: {"type": "http", "direction": "out", "name": "$return"}, ], } - + # Directory structure # handler # - source files @@ -244,7 +245,7 @@ def package_code( language_name, language_version, self.config.resources.resources_id, - trigger + trigger, ) .replace(".", "-") .replace("_", "-") @@ -254,7 +255,8 @@ def package_code( json_out = os.path.join(directory, "handler", "function.json") json.dump( self.create_function_json(func_name, EXEC_FILES[language_name]), - open(json_out, "w"), indent=2 + open(json_out, "w"), + indent=2, ) # generate host.json @@ -350,8 +352,10 @@ def update_function(self, function: Function, code_package: Benchmark): url = self.publish_function(function, code_package, True) # TODO(oana): this might need refactoring - if (function.name.endswith("http")): - trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) + if function.name.endswith("http"): + trigger = HTTPTrigger( + url, self.config.resources.data_storage_account(self.cli_instance) + ) trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) @@ -580,21 +584,23 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) resource_group = self.config.resources.resource_group(self.cli_instance) storage_account = azure_function.function_storage.account_name - user_principal_name = self.cli_instance.execute('az ad user list') + user_principal_name = self.cli_instance.execute("az ad user list") storage_account_scope = self.cli_instance.execute( - ('az storage account show --resource-group {} --name {} --query id') - .format(resource_group, storage_account) + ("az storage account show --resource-group {} --name {} --query id").format( + resource_group, storage_account + ) ) self.cli_instance.execute( - ('az role assignment create --assignee "{}" \ + ( + 'az role assignment create --assignee "{}" \ --role "Storage {} Data Contributor" \ - --scope {}') - .format( + --scope {}' + ).format( json.loads(user_principal_name.decode("utf-8"))[0]["userPrincipalName"], "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", - storage_account_scope.decode("utf-8") + storage_account_scope.decode("utf-8"), ) ) @@ -612,6 +618,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.cache_client.update_function(function) return trigger + # # def create_azure_function(self, fname, config): # diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 8970d90d..375c0b79 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -42,10 +42,9 @@ def deserialize(cached_config: dict) -> Function: for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"HTTP": HTTPTrigger, - "Queue": QueueTrigger, - "Storage": StorageTrigger - }.get(trigger["type"]) + {"HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get( + trigger["type"] + ), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index e74a3bcd..213215aa 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -59,9 +59,9 @@ def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = super().__init__() self.name = fname self._storage_account = storage_account - self._queue_name = None + self._queue_name = None - if (queue_name): + if queue_name: self._queue_name = queue_name else: # Having a queue name field is currently a bit contrived - it is mostly a @@ -69,12 +69,12 @@ def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = # future, we may adopt a different convention for naming trigger resources, # at which point this will become truly useful. self._queue_name = self.name - + # Init client default_credential = DefaultAzureCredential() - queue_client = QueueClient(self.account_url, - queue_name=self.queue_name, - credential=default_credential) + queue_client = QueueClient( + self.account_url, queue_name=self.queue_name, credential=default_credential + ) # Create queue self.logging.info(f"Creating queue {self.queue_name}") @@ -97,7 +97,7 @@ def trigger_type() -> Trigger.TriggerType: def storage_account(self) -> str: assert self._storage_account return self._storage_account - + @property def account_url(self) -> str: return f"https://{self.storage_account}.queue.core.windows.net" @@ -113,12 +113,12 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prepare queue client default_credential = DefaultAzureCredential() - queue_client = QueueClient(self.account_url, - queue_name=self.queue_name, - credential=default_credential) + queue_client = QueueClient( + self.account_url, queue_name=self.queue_name, credential=default_credential + ) # Publish payload to queue - serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') + serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8") queue_client.send_message(serialized_payload) self.logging.info(f"Sent message to queue {self.queue_name}") @@ -131,7 +131,12 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name, "storage_account": self.storage_account, "queue_name": self.queue_name} + return { + "type": "Queue", + "name": self.name, + "storage_account": self.storage_account, + "queue_name": self.queue_name, + } @staticmethod def deserialize(obj: dict) -> Trigger: @@ -144,7 +149,7 @@ def __init__(self, fname: str, storage_account: str, container_name: Optional[st self.name = fname self._storage_account = storage_account - if (container_name): + if container_name: self._container_name = container_name else: # Having a container name field is currently a bit contrived - it is mostly @@ -193,7 +198,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prepare blob file_name = "payload.json" - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: json.dump(payload, fp) # Init client @@ -201,8 +206,9 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) # Upload blob - blob_client = blob_service_client.get_blob_client(container=self.container_name, - blob=file_name) + blob_client = blob_service_client.get_blob_client( + container=self.container_name, blob=file_name + ) with open(file=file_name, mode="rb") as payload: blob_client.upload_blob(payload, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") @@ -216,7 +222,12 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "storage_account": self.storage_account, "container_name": self.container_name} + return { + "type": "Storage", + "name": self.name, + "storage_account": self.storage_account, + "container_name": self.container_name, + } @staticmethod def deserialize(obj: dict) -> Trigger: From 97d63450026c533a3cd50aa1abd5668ebae22f35 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:13:56 +0200 Subject: [PATCH 11/26] [gcp] Linting --- sebs/benchmark.py | 2 +- sebs/gcp/function.py | 12 ++++++------ sebs/gcp/triggers.py | 14 ++++++-------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index e18e0e20..1114a296 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -505,7 +505,7 @@ def build( self.language_version, self.benchmark, self.is_cached_valid, - self._experiment_config.trigger + self._experiment_config.trigger, ) self.logging.info( ( diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index f2fb5ca4..09cab242 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -30,8 +30,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, \ - QueueTrigger, StorageTrigger + from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = GCPFunction( @@ -44,10 +43,11 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, - "HTTP": HTTPTrigger, - "Queue": QueueTrigger, - "Storage": StorageTrigger + { + "Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger, }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 753113c1..b20708fb 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -161,13 +161,11 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Publish payload to queue pub_sub.projects().topics().publish( - topic=self.queue_name, - body={ - "messages": [{ - "data": serialized_payload.decode("utf-8") - }], - } - ).execute() + topic=self.queue_name, + body={ + "messages": [{"data": serialized_payload.decode("utf-8")}], + }, + ).execute() # TODO(oana): gather metrics @@ -209,7 +207,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") # Init clients - client = gcp_storage.Client(); + client = gcp_storage.Client() bucket_instance = client.bucket(self.bucket_name) # Prepare payload From be4e4f9b7f97500f5371dd4310397fe129d6b38d Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:18:46 +0200 Subject: [PATCH 12/26] [system] Fix incorrect callback type --- sebs/benchmark.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 1114a296..8e2a5a86 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -5,7 +5,7 @@ import shutil import subprocess from abc import abstractmethod -from typing import Any, Callable, Dict, List, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple import docker @@ -13,6 +13,7 @@ from sebs.cache import Cache from sebs.faas.config import Resources from sebs.utils import find_benchmark, project_absolute_path, LoggingBase +from sebs.faas.function import Trigger from sebs.faas.storage import PersistentStorage from typing import TYPE_CHECKING @@ -470,7 +471,10 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] + self, + deployment_build_step: Callable[ + [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] + ], ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild From debbda0e5ac4d6c6ba8fc755a018b4cbbaf1467f Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:39:00 +0200 Subject: [PATCH 13/26] [gcp] Help mypy determine the type --- sebs/gcp/gcp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 1f093876..9f031887 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -394,6 +394,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) time.sleep(3) self.logging.info(f"Function {function.name} - deployed!") + trigger: Trigger if trigger_type == Trigger.TriggerType.HTTP: invoke_url = status_res["httpsTrigger"]["url"] trigger = HTTPTrigger(invoke_url) From 3e52f3ab905cd8fa5834fb96bb8421813e206e46 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:40:51 +0200 Subject: [PATCH 14/26] [aws] Fix return type --- sebs/aws/triggers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index c1a47e4f..39f022ee 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -306,7 +306,7 @@ def typename() -> str: return "AWS.StorageTrigger" @property - def bucket_name(self) -> AWS: + def bucket_name(self) -> str: assert self._bucket_name return self._bucket_name From 4bd7a20e8a5e0ff13fb1a762a3d490da2530de9b Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:43:34 +0200 Subject: [PATCH 15/26] [azure] fix var name confusing mypy --- sebs/azure/triggers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 213215aa..fc5c1e36 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -209,8 +209,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client = blob_service_client.get_blob_client( container=self.container_name, blob=file_name ) - with open(file=file_name, mode="rb") as payload: - blob_client.upload_blob(payload, overwrite=True) + with open(file=file_name, mode="rb") as payload_data: + blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") # TODO(oana): gather metrics From a466aab1d84206a28661308420bc2fdbca0a64a6 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:49:52 +0200 Subject: [PATCH 16/26] [gcp] [azure] Fix linting issues --- sebs/azure/azure.py | 1 + sebs/gcp/gcp.py | 2 +- sebs/gcp/triggers.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 03945274..bae91f38 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -604,6 +604,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) ) ) + trigger: Trigger if trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger(function.name, storage_account) self.logging.info(f"Created Queue trigger for {function.name} function") diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 9f031887..6691f1b5 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -420,6 +420,7 @@ def cached_function(self, function: Function): from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger, QueueTrigger, StorageTrigger + gcp_trigger: Trigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers @@ -431,7 +432,6 @@ def cached_function(self, function: Function): for trigger in function.triggers(Trigger.TriggerType.STORAGE): gcp_trigger = cast(StorageTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers - gcp_trigger.deployment_client = self def update_function(self, function: Function, code_package: Benchmark): diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index b20708fb..556b46a9 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -131,7 +131,7 @@ def typename() -> str: return "GCP.QueueTrigger" @property - def queue_name(self) -> GCP: + def queue_name(self) -> str: assert self._queue_name return self._queue_name @@ -198,7 +198,7 @@ def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.STORAGE @property - def bucket_name(self) -> GCP: + def bucket_name(self) -> str: assert self._bucket_name return self._bucket_name From 0b310c195519c6359ad3132aff687d07c0019974 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 9 Sep 2024 00:24:47 +0200 Subject: [PATCH 17/26] Measurements infrastructure with queues --- benchmarks/wrappers/aws/python/handler.py | 47 ++++-- benchmarks/wrappers/aws/python/queue.py | 14 ++ benchmarks/wrappers/azure/python/handler.py | 81 +++++++--- benchmarks/wrappers/azure/python/queue.py | 15 ++ benchmarks/wrappers/gcp/python/handler.py | 89 +++++++---- benchmarks/wrappers/gcp/python/queue.py | 14 ++ config/systems.json | 13 +- docs/modularity.md | 3 +- requirements.gcp.txt | 1 + scripts/run_experiments.py | 1 + sebs/aws/aws.py | 1 + sebs/aws/triggers.py | 150 ++++++++++++------ sebs/azure/azure.py | 40 ++++- sebs/azure/triggers.py | 165 +++++++++++++++----- sebs/gcp/gcp.py | 13 +- sebs/gcp/triggers.py | 122 +++++++++++++-- tests/aws/create_function.py | 4 +- 17 files changed, 582 insertions(+), 191 deletions(-) create mode 100644 benchmarks/wrappers/aws/python/queue.py create mode 100644 benchmarks/wrappers/azure/python/queue.py create mode 100644 benchmarks/wrappers/gcp/python/queue.py diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 2601dddf..a100393a 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -7,10 +7,16 @@ def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() + # Flag to indicate whether the measurements should be returned as an HTTP + # response or via a result queue. + return_http = True + # Queue trigger if ("Records" in event and event["Records"][0]["eventSource"] == 'aws:sqs'): event = json.loads(event["Records"][0]["body"]) + return_http = False + # Storage trigger if ("Records" in event and "s3" in event["Records"][0]): bucket_name = event["Records"][0]["s3"]["bucket"]["name"] @@ -22,6 +28,8 @@ def handler(event, context): obj = storage_inst.get_object(bucket_name, file_name) event = json.loads(obj['Body'].read()) + return_http = False + # HTTP trigger with API Gateaway if 'body' in event: event = json.loads(event['body']) @@ -68,17 +76,30 @@ def handler(event, context): if "cold_start" in os.environ: cold_start_var = os.environ["cold_start"] - return { - 'statusCode': 200, - 'body': json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': context.aws_request_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }) - } + stats = json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': context.aws_request_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }) + + # HTTP or library trigger: return an HTTP response. + if (return_http): + return { + 'statusCode': 200, + 'body': stats + } + + # Queue or storage trigger: return via a result queue. + arn = context.invoked_function_arn.split(":") + region = arn[3] + account_id = arn[4] + queue_name = f"{arn[6]}-result" + from function import queue + queue_client = queue.queue(queue_name, account_id, region) + queue_client.send_message(stats) diff --git a/benchmarks/wrappers/aws/python/queue.py b/benchmarks/wrappers/aws/python/queue.py new file mode 100644 index 00000000..95cde8a7 --- /dev/null +++ b/benchmarks/wrappers/aws/python/queue.py @@ -0,0 +1,14 @@ +import boto3 + +class queue: + client = None + + def __init__(self, queue_name: str, account_id: str, region: str): + self.client = boto3.client('sqs', region_name=region) + self.queue_url = f"https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}" + + def send_message(self, message: str): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=message, + ) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 6375de39..e64b17c1 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -2,16 +2,69 @@ import base64 import datetime, io, json, logging, os, uuid +from azure.identity import ManagedIdentityCredential +from azure.storage.queue import QueueClient + import azure.functions as func def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() + req_json = req.get_json() if 'connection_string' in req_json: os.environ['STORAGE_CONNECTION_STRING'] = req_json['connection_string'] + req_json['request-id'] = context.invocation_id req_json['income-timestamp'] = income_timestamp + + return func.HttpResponse(measure(req_json), mimetype="application/json") + +def handler_queue(msg: func.QueueMessage, context: func.Context): + income_timestamp = datetime.datetime.now().timestamp() + + logging.info('Python queue trigger function processed a queue item.') + payload = msg.get_json() + + payload['request-id'] = context.invocation_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + queue_name = f"{os.getenv('WEBSITE_SITE_NAME')}-result" + storage_account = os.getenv('STORAGE_ACCOUNT') + logging.info(queue_name) + logging.info(storage_account) + + from . import queue + queue_client = queue.queue(queue_name, storage_account) + queue_client.send_message(stats) + +def handler_storage(blob: func.InputStream, context: func.Context): + income_timestamp = datetime.datetime.now().timestamp() + + logging.info('Python Blob trigger function processed %s', blob.name) + payload = json.loads(blob.readline().decode('utf-8')) + + payload['request-id'] = context.invocation_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + queue_name = f"{os.getenv('WEBSITE_SITE_NAME')}-result" + storage_account = os.getenv('STORAGE_ACCOUNT') + logging.info(queue_name) + logging.info(storage_account) + + from . import queue + queue_client = queue.queue(queue_name, storage_account) + queue_client.send_message(stats) + +def measure(req_json) -> str: + # logging.info("TIPU") TODO(oana) remove + # logging.info(type(req_json)) + req_id = req_json['request-id'] + begin = datetime.datetime.now() # We are deployed in the same directory from . import function @@ -29,7 +82,6 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo from . import storage storage_inst = storage.storage.get_instance() b = req_json.get('logs').get('bucket') - req_id = context.invocation_id storage_inst.upload_stream(b, '{}.json'.format(req_id), io.BytesIO(json.dumps(log_data).encode('utf-8'))) results_end = datetime.datetime.now() @@ -57,8 +109,7 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo cold_marker = True is_cold_worker = True - return func.HttpResponse( - json.dumps({ + return json.dumps({ 'begin': begin.strftime('%s.%f'), 'end': end.strftime('%s.%f'), 'results_time': results_time, @@ -67,25 +118,5 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo 'is_cold_worker': is_cold_worker, 'container_id': container_id, 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id - }), - mimetype="application/json" - ) - -def handler_queue(msg: func.QueueMessage): - logging.info('Python queue trigger function processed a queue item.') - payload = msg.get_body().decode('utf-8') - - from . import function - ret = function.handler(payload) - - # TODO(oana) - -def handler_storage(blob: func.InputStream): - logging.info('Python Blob trigger function processed %s', blob.name) - payload = blob.readline().decode('utf-8') # TODO(oana) - - from . import function - ret = function.handler(payload) - - # TODO(oana) + 'request_id': req_id + }) \ No newline at end of file diff --git a/benchmarks/wrappers/azure/python/queue.py b/benchmarks/wrappers/azure/python/queue.py new file mode 100644 index 00000000..93824181 --- /dev/null +++ b/benchmarks/wrappers/azure/python/queue.py @@ -0,0 +1,15 @@ +from azure.identity import ManagedIdentityCredential +from azure.storage.queue import QueueClient + +class queue: + client = None + + def __init__(self, queue_name: str, storage_account: str): + account_url = f"https://{storage_account}.queue.core.windows.net" + managed_credential = ManagedIdentityCredential() + self.client = QueueClient(account_url, + queue_name=queue_name, + credential=managed_credential) + + def send_message(self, message: str): + self.client.send_message(message) diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index 6a1284e5..c547c52c 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -8,10 +8,67 @@ def handler_http(req): income_timestamp = datetime.datetime.now().timestamp() req_id = req.headers.get('Function-Execution-Id') - req_json = req.get_json() req_json['request-id'] = req_id req_json['income-timestamp'] = income_timestamp + + return measure(req_json), 200, {'ContentType': 'application/json'} + +def handler_queue(data, context): + income_timestamp = datetime.datetime.now().timestamp() + + serialized_payload = data.get('data') + payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) + + payload['request-id'] = context.event_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Retrieve the project id and construct the result queue name. + project_id = context.resource.split("/")[1] + topic_name = f"{context.resource.split('/')[3]}-result" + + from function import queue + queue_client = queue.queue(topic_name, project_id) + queue_client.send_message(stats) + +def handler_storage(data, context): + income_timestamp = datetime.datetime.now().timestamp() + + bucket_name = data.get('bucket') + name = data.get('name') + filepath = '/tmp/bucket_contents' + + from function import storage + storage_inst = storage.storage.get_instance() + storage_inst.download(bucket_name, name, filepath) + + payload = {} + + with open(filepath, 'r') as fp: + payload = json.load(fp) + + payload['request-id'] = context.event_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Retrieve the project id and construct the result queue name. + from google.auth import default + # Used to be an env var, now we need an additional request to the metadata + # server to retrieve it. + _, project_id = default() + topic_name = f"{context.resource['name'].split('/')[3]}-result" + + from function import queue + queue_client = queue.queue(topic_name, project_id) + queue_client.send_message(stats) + +# TODO(oana) comment +def measure(req_json) -> str: + req_id = req_json['request-id'] + begin = datetime.datetime.now() # We are deployed in the same directorygit status from function import function @@ -62,32 +119,4 @@ def handler_http(req): 'request_id': req_id, 'cold_start_var': cold_start_var, 'container_id': container_id, - }), 200, {'ContentType': 'application/json'} - -def handler_queue(data, context): - serialized_payload = data.get('data') - payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) - - from function import function - ret = function.handler(payload) - - # TODO(oana) - -def handler_storage(data, context): - bucket_name = data.get('bucket') - name = data.get('name') - filepath = '/tmp/bucket_contents' - - from function import storage - storage_inst = storage.storage.get_instance() - storage_inst.download(bucket_name, name, filepath) - - payload = {} - - with open(filepath, 'r') as fp: - payload = json.load(fp) - - from function import function - ret = function.handler(payload) - - # TODO(oana) + }) diff --git a/benchmarks/wrappers/gcp/python/queue.py b/benchmarks/wrappers/gcp/python/queue.py new file mode 100644 index 00000000..b6e009e7 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/queue.py @@ -0,0 +1,14 @@ +from google.cloud import pubsub_v1 + +class queue: + client = None + + def __init__(self, topic_name: str, project_id: str): + self.client = pubsub_v1.PublisherClient() + self.topic_name = 'projects/{project_id}/topics/{topic}'.format( + project_id=project_id, + topic=topic_name, + ) + + def send_message(self, message: str): + self.client.publish(self.topic_name, message.encode("utf-8")) diff --git a/config/systems.json b/config/systems.json index 6e9ddd0c..88358b60 100644 --- a/config/systems.json +++ b/config/systems.json @@ -71,7 +71,8 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [] } @@ -114,10 +115,13 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [ - "azure-storage-blob" + "azure-storage-blob", + "\nazure-storage-queue", + "\nazure-identity" ] } }, @@ -162,7 +166,8 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [ "google-cloud-storage" diff --git a/docs/modularity.md b/docs/modularity.md index f2614655..f6015b8e 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -267,7 +267,8 @@ Check other platforms to see how configuration is defined, for example, for AWS: "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [] } diff --git a/requirements.gcp.txt b/requirements.gcp.txt index 9cb90916..3d1aea35 100644 --- a/requirements.gcp.txt +++ b/requirements.gcp.txt @@ -4,3 +4,4 @@ google-api-python-client==1.12.5 google-cloud-monitoring==2.0.0 google-api-python-client-stubs google-cloud-logging==2.0.0 +google-cloud-pubsub=2.23.0 \ No newline at end of file diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index c18b96c0..c9167553 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -445,6 +445,7 @@ def __init__(self, cache_client, config, docker_client, language): function - function.py - storage.py + - queue.py - resources handler.py diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 92c65dcc..9bcb52e6 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -119,6 +119,7 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: function - function.py - storage.py + - queue.py - resources handler.py diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 39f022ee..5c296c90 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -8,7 +8,9 @@ import boto3 from sebs.aws.aws import AWS +from sebs.aws.queue import SQS from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class LibraryTrigger(Trigger): @@ -133,65 +135,59 @@ def __init__( self, fname: str, deployment_client: Optional[AWS] = None, - queue_arn: Optional[str] = None, - queue_url: Optional[str] = None, + queue: Optional[SQS] = None, + result_queue: Optional[SQS] = None ): super().__init__() self.name = fname + self._queue = queue + self._result_queue = result_queue + self._deployment_client = deployment_client - self._deployment_client = None - self._queue_arn = None - self._queue_url = None - - if deployment_client: - self._deployment_client = deployment_client - if queue_arn: - self._queue_arn = queue_arn - if queue_url: - self._queue_url = queue_url - - # When creating the trigger for the first time, also create and store - # queue information. - if not self.queue_arn and not self.queue_url: - # Init clients - lambda_client = self.deployment_client.get_lambda_client() - sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) - - # Create queue - self.logging.debug(f"Creating queue {self.name}") - - self._queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] - self._queue_arn = sqs_client.get_queue_attributes( - QueueUrl=self.queue_url, AttributeNames=["QueueArn"] - )["Attributes"]["QueueArn"] - - self.logging.debug("Created queue") + if (not self._queue): + self._queue = SQS( + self.name, + QueueType.TRIGGER, + self.deployment_client.config.region + ) + self.queue.create_queue() # Add queue trigger + lambda_client = self.deployment_client.get_lambda_client() if not len( lambda_client.list_event_source_mappings( - EventSourceArn=self.queue_arn, FunctionName=self.name + EventSourceArn=self.queue.queue_arn, FunctionName=self.name )["EventSourceMappings"] ): lambda_client.create_event_source_mapping( - EventSourceArn=self.queue_arn, + EventSourceArn=self.queue.queue_arn, FunctionName=self.name, MaximumBatchingWindowInSeconds=1, ) + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = SQS( + fname, + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() + @staticmethod def typename() -> str: return "AWS.QueueTrigger" @property - def queue_arn(self) -> str: - assert self._queue_arn - return self._queue_arn + def queue(self) -> SQS: + assert self._queue + return self._queue @property - def queue_url(self) -> str: - assert self._queue_url - return self._queue_url + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue @property def deployment_client(self) -> AWS: @@ -210,14 +206,21 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) - # Publish payload to queue serialized_payload = json.dumps(payload) - sqs_client.send_message(QueueUrl=self.queue_url, MessageBody=serialized_payload) - self.logging.info(f"Sent message to queue {self.name}") + begin = datetime.datetime.now() + self.queue.send_message(serialized_payload) - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -229,33 +232,45 @@ def serialize(self) -> dict: return { "type": "Queue", "name": self.name, - "queue_arn": self.queue_arn, - "queue_url": self.queue_url, + "queue": self.queue.serialize(), + "result_queue": self.result_queue.serialize() } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], None, obj["queue_arn"], obj["queue_url"]) + return QueueTrigger( + obj["name"], + None, + SQS.deserialize(obj["queue"]), + SQS.deserialize(obj["result_queue"]) + ) class StorageTrigger(Trigger): def __init__( - self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None + self, + fname: str, + deployment_client: Optional[AWS] = None, + bucket_name: Optional[str] = None, + result_queue: Optional[SQS] = None ): super().__init__() self.name = fname self._deployment_client = None self._bucket_name = None + self._result_queue = None if deployment_client: self._deployment_client = deployment_client if bucket_name: self._bucket_name = bucket_name + if result_queue: + self._result_queue = result_queue # When creating the trigger for the first time, also create and store # storage bucket information. - if not self.bucket_name: + if not self._bucket_name: # Init clients s3 = boto3.resource("s3") lambda_client = self.deployment_client.get_lambda_client() @@ -301,6 +316,16 @@ def __init__( } ) + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = SQS( + fname, + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() + @staticmethod def typename() -> str: return "AWS.StorageTrigger" @@ -315,6 +340,11 @@ def deployment_client(self) -> AWS: assert self._deployment_client return self._deployment_client + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + @deployment_client.setter def deployment_client(self, deployment_client: AWS): self._deployment_client = deployment_client @@ -331,10 +361,20 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Put object s3 = boto3.resource("s3") + begin = datetime.datetime.now() s3.Object(self.bucket_name, "payload.json").put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -343,8 +383,18 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} + return { + "type": "Storage", + "name": self.name, + "bucket_name": self.bucket_name, + "result_queue": self.result_queue.serialize() + } @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], None, obj["bucket_name"]) + return StorageTrigger( + obj["name"], + None, + obj["bucket_name"], + SQS.deserialize(obj["result_queue"]) + ) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index bae91f38..638b3b25 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -573,8 +573,8 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) time.sleep(20) """ - The only implemented trigger at the moment is HTTPTrigger. - It is automatically created for each function. + Supports HTTP, queue and storage triggers, as specified by + the user when SeBS is run. """ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: @@ -605,12 +605,36 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) ) trigger: Trigger - if trigger_type == Trigger.TriggerType.QUEUE: - trigger = QueueTrigger(function.name, storage_account) - self.logging.info(f"Created Queue trigger for {function.name} function") - elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name, storage_account) - self.logging.info(f"Created Storage trigger for {function.name} function") + if trigger_type == Trigger.TriggerType.QUEUE or trigger_type == Trigger.TriggerType.STORAGE: + resource_group = self.config.resources.resource_group(self.cli_instance) + + # Set the storage account as an env var on the function. + ret = self.cli_instance.execute( + f"az functionapp config appsettings set --name {function.name} " + f" --resource-group {resource_group} " + f" --settings STORAGE_ACCOUNT={storage_account}" + ) + print(ret.decode()) + + # Connect the function app to the result queue via Service + # Connector. + ret = self.cli_instance.execute( + f"az webapp connection create storage-queue " + f" --resource-group {resource_group} " + f" --target-resource-group {resource_group} " + f" --account {storage_account} " + f" --name {function.name} " + f" --client-type python " # TODO(oana) does this work for nodejs + f" --system-identity " + ) + print(ret.decode()) + + if trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(function.name, storage_account, self.config.region) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(function.name, storage_account, self.config.region) + self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index fc5c1e36..96edff9a 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,6 +1,8 @@ import base64 import concurrent.futures +import datetime import json +import time from typing import Any, Dict, Optional # noqa from azure.core.exceptions import ResourceExistsError @@ -9,7 +11,9 @@ from azure.storage.queue import QueueClient from sebs.azure.config import AzureResources +from sebs.azure.queue import AzureQueue from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class AzureTrigger(Trigger): @@ -55,35 +59,38 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = None): + def __init__( + self, + fname: str, + storage_account: str, + region: str, + queue: Optional[AzureQueue] = None, + result_queue: Optional[AzureQueue] = None + ): super().__init__() self.name = fname self._storage_account = storage_account - self._queue_name = None - - if queue_name: - self._queue_name = queue_name - else: - # Having a queue name field is currently a bit contrived - it is mostly a - # device to indicate that a trigger resource exists and is cached. In the - # future, we may adopt a different convention for naming trigger resources, - # at which point this will become truly useful. - self._queue_name = self.name - - # Init client - default_credential = DefaultAzureCredential() - queue_client = QueueClient( - self.account_url, queue_name=self.queue_name, credential=default_credential + self._region = region + self._queue = queue + self._result_queue = result_queue + + if (not self._queue): + self._queue = AzureQueue( + self.name, + QueueType.TRIGGER, + self.storage_account, + self.region ) - - # Create queue - self.logging.info(f"Creating queue {self.queue_name}") - - try: - queue_client.create_queue() - self.logging.info("Created queue") - except ResourceExistsError: - self.logging.info("Queue already exists, reusing...") + self.queue.create_queue() + + if (not self._result_queue): + self._result_queue = AzureQueue( + fname, + QueueType.RESULT, + storage_account, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -98,6 +105,21 @@ def storage_account(self) -> str: assert self._storage_account return self._storage_account + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def queue(self) -> AzureQueue: + assert self._queue + return self._queue + + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + @property def account_url(self) -> str: return f"https://{self.storage_account}.queue.core.windows.net" @@ -111,18 +133,23 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") - # Prepare queue client - default_credential = DefaultAzureCredential() - queue_client = QueueClient( - self.account_url, queue_name=self.queue_name, credential=default_credential - ) - # Publish payload to queue serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8") - queue_client.send_message(serialized_payload) - self.logging.info(f"Sent message to queue {self.queue_name}") + begin = datetime.datetime.now() + self.queue.send_message(serialized_payload) + + response = "" + while (response == ""): + response = self.result_queue.receive_message() + if (response == ""): + time.sleep(5) - # TODO(oana): gather metrics + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -135,19 +162,37 @@ def serialize(self) -> dict: "type": "Queue", "name": self.name, "storage_account": self.storage_account, - "queue_name": self.queue_name, + "region": self.region, + "queue": self.queue.serialize(), + "result_queue": self.result_queue.serialize() } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], obj["storage_account"], obj["queue_name"]) + return QueueTrigger( + obj["name"], + obj["storage_account"], + obj["region"], + AzureQueue.deserialize(obj["queue"]), + AzureQueue.deserialize(obj["result_queue"]) + ) class StorageTrigger(Trigger): - def __init__(self, fname: str, storage_account: str, container_name: Optional[str] = None): + def __init__( + self, + fname: str, + storage_account: str, + region: str, + result_queue: Optional[AzureQueue] = None, + container_name: Optional[str] = None + ): super().__init__() self.name = fname self._storage_account = storage_account + self._region = region + self._result_queue = result_queue + self._container_name = None if container_name: self._container_name = container_name @@ -170,6 +215,15 @@ def __init__(self, fname: str, storage_account: str, container_name: Optional[st except ResourceExistsError: self.logging.info("Container already exists, reusing...") + if (not self._result_queue): + self._result_queue = AzureQueue( + fname, + QueueType.RESULT, + storage_account, + self.region + ) + self._result_queue.create_queue() + @staticmethod def typename() -> str: return "Azure.StorageTrigger" @@ -183,6 +237,16 @@ def storage_account(self) -> str: assert self._storage_account return self._storage_account + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + @property def account_url(self) -> str: return f"https://{self.storage_account}.blob.core.windows.net" @@ -209,11 +273,26 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client = blob_service_client.get_blob_client( container=self.container_name, blob=file_name ) + begin = datetime.datetime.now() with open(file=file_name, mode="rb") as payload_data: blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") - # TODO(oana): gather metrics + response = "" # TODO(oana) cleanup + # while (response == ""): + # response = self.result_queue.receive_message() + # if (response == ""): + # time.sleep(5) + while (response == ""): + time.sleep(5) + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -226,9 +305,17 @@ def serialize(self) -> dict: "type": "Storage", "name": self.name, "storage_account": self.storage_account, + "region": self.region, + "result_queue": self.result_queue.serialize(), "container_name": self.container_name, } @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], obj["storage_account"], obj["container_name"]) + return StorageTrigger( + obj["name"], + obj["storage_account"], + obj["region"], + AzureQueue.deserialize(obj["result_queue"]), + obj["container_name"] + ) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 6691f1b5..e8614cdc 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -248,7 +248,8 @@ def package_code( shutil.move(file, function_dir) requirements = open(os.path.join(directory, "requirements.txt"), "w") - requirements.write("google-cloud-storage") + requirements.write("google-cloud-storage\n") + requirements.write("google-cloud-pubsub") requirements.close() # rename handler function.py since in gcp it has to be caled main.py @@ -401,11 +402,17 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.logging.info(f"Created HTTP trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger( - function.name, self.get_trigger_resource_name(function.name), self + function.name, + self.get_trigger_resource_name(function.name), + self.config.region ) self.logging.info(f"Created Queue trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name, self.get_trigger_resource_name(function.name)) + trigger = StorageTrigger( + function.name, + self.get_trigger_resource_name(function.name), + self.config.region + ) self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 556b46a9..072b9ba9 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -11,7 +11,9 @@ from google.cloud import storage as gcp_storage from sebs.gcp.gcp import GCP +from sebs.gcp.queue import GCPQueue from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class LibraryTrigger(Trigger): @@ -120,11 +122,28 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, queue_name: str, deployment_client: Optional[GCP] = None): + def __init__( + self, + fname: str, + queue_name: str, + region: str, + result_queue: Optional[GCPQueue] = None + ): super().__init__() self.name = fname - self._deployment_client = deployment_client self._queue_name = queue_name + self._region = region + self._result_queue = result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = GCPQueue( + fname, + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -136,13 +155,14 @@ def queue_name(self) -> str: return self._queue_name @property - def deployment_client(self) -> GCP: - assert self._deployment_client - return self._deployment_client + def region(self) -> str: + assert self._region + return self._region - @deployment_client.setter - def deployment_client(self, deployment_client: GCP): - self._deployment_client = deployment_client + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue @staticmethod def trigger_type() -> Trigger.TriggerType: @@ -160,6 +180,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) # Publish payload to queue + begin = datetime.datetime.now() pub_sub.projects().topics().publish( topic=self.queue_name, body={ @@ -167,7 +188,16 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: }, ).execute() - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -176,18 +206,47 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name, "queue_name": self.queue_name} + return { + "type": "Queue", + "name": self.name, + "queue_name": self.queue_name, + "region": self.region, + "result_queue": self.result_queue.serialize() + } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], obj["queue_name"]) + return QueueTrigger( + obj["name"], + obj["queue_name"], + obj["region"], + GCPQueue.deserialize(obj["result_queue"]) + ) class StorageTrigger(Trigger): - def __init__(self, fname: str, bucket_name: str): + def __init__( + self, + fname: str, + bucket_name: str, + region: str, + result_queue: Optional[GCPQueue] = None + ): super().__init__() self.name = fname self._bucket_name = bucket_name + self._region = region + self._result_queue = result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = GCPQueue( + fname, + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -202,13 +261,23 @@ def bucket_name(self) -> str: assert self._bucket_name return self._bucket_name + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") # Init clients client = gcp_storage.Client() - bucket_instance = client.bucket(self.bucket_name) + bucket_instance = client.bucket(self.name) # Prepare payload file_name = "payload.json" @@ -218,11 +287,21 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Upload object gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) + begin = datetime.datetime.now() blob.upload_from_filename(file_name) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -231,8 +310,19 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} + return { + "type": "Storage", + "name": self.name, + "bucket_name": self.bucket_name, + "region": self.region, + "result_queue": self.result_queue.serialize() + } @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], obj["bucket_name"]) + return StorageTrigger( + obj["name"], + obj["bucket_name"], + obj["region"], + GCPQueue.deserialize(obj["result_queue"]) + ) diff --git a/tests/aws/create_function.py b/tests/aws/create_function.py index e672cc89..bb22cfb0 100644 --- a/tests/aws/create_function.py +++ b/tests/aws/create_function.py @@ -35,8 +35,8 @@ class AWSCreateFunction(unittest.TestCase): } } package_files = { - "python": ["handler.py", "function/storage.py", "requirements.txt", '.python_packages/'], - "nodejs": ["handler.js", "function/storage.js", "package.json", "node_modules/"] + "python": ["handler.py", "function/storage.py", "function/queue.py", "requirements.txt", '.python_packages/'], + "nodejs": ["handler.js", "function/storage.js", "function/queue.js", "package.json", "node_modules/"] } benchmark = "110.dynamic-html" function_name_suffixes = [] From f8f316210b9c8d67369471cdf452731d119d8d1f Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 9 Sep 2024 00:27:56 +0200 Subject: [PATCH 18/26] Fix --- sebs/benchmark.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 8e2a5a86..f0911708 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -13,7 +13,7 @@ from sebs.cache import Cache from sebs.faas.config import Resources from sebs.utils import find_benchmark, project_absolute_path, LoggingBase -from sebs.faas.function import Trigger +# from sebs.faas.function import Trigger from sebs.faas.storage import PersistentStorage from typing import TYPE_CHECKING @@ -471,10 +471,12 @@ def recalculate_code_size(self): return self._code_size def build( - self, - deployment_build_step: Callable[ - [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] - ], + self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] + # TODO(oana) fix? + # self, + # deployment_build_step: Callable[ + # [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] + # ], ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild From dac2840e0d85d894e9122ac33630847c4a1731c1 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 9 Sep 2024 10:20:47 +0200 Subject: [PATCH 19/26] Wrap up and clean up --- benchmarks/wrappers/azure/python/handler.py | 4 ++-- benchmarks/wrappers/gcp/python/handler.py | 3 ++- sebs/aws/triggers.py | 2 -- sebs/azure/azure.py | 2 -- sebs/azure/triggers.py | 8 +------- sebs/gcp/triggers.py | 2 -- 6 files changed, 5 insertions(+), 16 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index e64b17c1..70843b6e 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -60,9 +60,9 @@ def handler_storage(blob: func.InputStream, context: func.Context): queue_client = queue.queue(queue_name, storage_account) queue_client.send_message(stats) +# Contains generic logic for gathering measurements for the function at hand, +# given a request JSON. Used by all handlers, regardless of the trigger. def measure(req_json) -> str: - # logging.info("TIPU") TODO(oana) remove - # logging.info(type(req_json)) req_id = req_json['request-id'] begin = datetime.datetime.now() diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index c547c52c..51a9d604 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -65,7 +65,8 @@ def handler_storage(data, context): queue_client = queue.queue(topic_name, project_id) queue_client.send_message(stats) -# TODO(oana) comment +# Contains generic logic for gathering measurements for the function at hand, +# given a request JSON. Used by all handlers, regardless of the trigger. def measure(req_json) -> str: req_id = req_json['request-id'] diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 5c296c90..96b9bc20 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -217,7 +217,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result @@ -371,7 +370,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 638b3b25..f3257d3a 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -351,7 +351,6 @@ def update_function(self, function: Function, code_package: Benchmark): self._mount_function_code(code_package) url = self.publish_function(function, code_package, True) - # TODO(oana): this might need refactoring if function.name.endswith("http"): trigger = HTTPTrigger( url, self.config.resources.data_storage_account(self.cli_instance) @@ -624,7 +623,6 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) f" --target-resource-group {resource_group} " f" --account {storage_account} " f" --name {function.name} " - f" --client-type python " # TODO(oana) does this work for nodejs f" --system-identity " ) print(ret.decode()) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 96edff9a..2a2e96bc 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -146,7 +146,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result @@ -278,18 +277,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") - response = "" # TODO(oana) cleanup - # while (response == ""): - # response = self.result_queue.receive_message() - # if (response == ""): - # time.sleep(5) + response = "" while (response == ""): time.sleep(5) response = self.result_queue.receive_message() end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 072b9ba9..41fbe18c 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -194,7 +194,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result @@ -298,7 +297,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result From 86f00308fa9afdcfc7fe6f6ef2f3c81a2acde15f Mon Sep 17 00:00:00 2001 From: orosca Date: Thu, 24 Oct 2024 22:53:34 +0200 Subject: [PATCH 20/26] Applications client-side infrastructure --- requirements.gcp.txt | 2 +- sebs.py | 157 +++++++++++++++++++++++--------- sebs/aws/aws.py | 51 +++++++++-- sebs/aws/function.py | 4 +- sebs/aws/queue.py | 125 ++++++++++++++++++++++++++ sebs/aws/triggers.py | 165 +++++++++++++++++++++++++--------- sebs/azure/azure.py | 197 ++++++++++++++++++++++++++++++----------- sebs/azure/function.py | 6 +- sebs/azure/queue.py | 100 +++++++++++++++++++++ sebs/azure/triggers.py | 124 ++++++++++++++++++-------- sebs/benchmark.py | 82 +++++++++++++---- sebs/faas/function.py | 63 ++++++++++++- sebs/faas/queue.py | 62 +++++++++++++ sebs/faas/storage.py | 6 +- sebs/faas/system.py | 11 ++- sebs/gcp/function.py | 4 +- sebs/gcp/gcp.py | 50 +++++++++-- sebs/gcp/queue.py | 124 ++++++++++++++++++++++++++ sebs/gcp/triggers.py | 145 +++++++++++++++++++++++------- sebs/sebs.py | 2 + sebs/utils.py | 8 +- 21 files changed, 1243 insertions(+), 245 deletions(-) create mode 100644 sebs/aws/queue.py create mode 100644 sebs/azure/queue.py create mode 100644 sebs/faas/queue.py create mode 100644 sebs/gcp/queue.py diff --git a/requirements.gcp.txt b/requirements.gcp.txt index 3d1aea35..4550ac88 100644 --- a/requirements.gcp.txt +++ b/requirements.gcp.txt @@ -4,4 +4,4 @@ google-api-python-client==1.12.5 google-cloud-monitoring==2.0.0 google-api-python-client-stubs google-cloud-logging==2.0.0 -google-cloud-pubsub=2.23.0 \ No newline at end of file +google-cloud-pubsub diff --git a/sebs.py b/sebs.py index 9f0bf620..9e7a076e 100755 --- a/sebs.py +++ b/sebs.py @@ -14,7 +14,7 @@ from sebs import SeBS from sebs.types import Storage as StorageTypes from sebs.regression import regression_suite -from sebs.utils import update_nested_dict, catch_interrupt +from sebs.utils import update_nested_dict, catch_interrupt, find_benchmark from sebs.faas import System as FaaSSystem from sebs.faas.function import Trigger @@ -230,51 +230,128 @@ def invoke( experiment_config = sebs_client.get_experiment_config(config["experiments"]) update_nested_dict(config, ["experiments", "benchmark"], benchmark) - benchmark_obj = sebs_client.get_benchmark( - benchmark, - deployment_client, - experiment_config, - logging_filename=logging_filename, - ) - if memory is not None: - benchmark_obj.benchmark_config.memory = memory - if timeout is not None: - benchmark_obj.benchmark_config.timeout = timeout - - function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) - - # GCP and Azure only allow one trigger per function, so augment function name with - # trigger type: _http, _queue etc. - # - # Additionally, Azure requires for the trigger to be defined at deployment time. - if deployment_client.name() == "gcp" or deployment_client.name() == "azure": - function_name = "{}-{}".format(function_name, trigger) - - func = deployment_client.get_function( - benchmark_obj, - function_name, - ) - storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) - input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) - result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) - result.begin() + root_benchmark_path = find_benchmark(benchmark, "benchmarks") + if not root_benchmark_path: + raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=benchmark)) + with open(os.path.join(root_benchmark_path, "config.json")) as json_file: + root_benchmark_config = json.load(json_file) + + # Application handling. + benchmark_objs = {} + if ("type" in root_benchmark_config and root_benchmark_config["type"] == "app"): + list_subfolders = [f.name for f in os.scandir(root_benchmark_path) if f.is_dir()] + + for function in list_subfolders: + benchmark_obj = sebs_client.get_benchmark( + benchmark, + deployment_client, + experiment_config, + app_function_name=function, + logging_filename=logging_filename + ) + + application_name = deployment_client.default_application_name(benchmark_obj) + function_name = deployment_client.default_function_name(benchmark_obj) + + benchmark_obj.application_name = application_name + + # All functions within an application need to be connected to the + # result queue. + benchmark_obj.benchmark_config.result_queue = f"{application_name}-result" + + trigger = benchmark_obj.benchmark_config.trigger + if deployment_client.name() == "gcp" or deployment_client.name() == "azure": + function_name = "{}-{}".format(function_name, trigger) + + func = deployment_client.get_function(benchmark_obj, function_name) + + storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) + input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) - trigger_type = Trigger.TriggerType.get(trigger) - triggers = func.triggers(trigger_type) - if len(triggers) == 0: - trigger = deployment_client.create_trigger(func, trigger_type) + benchmark_objs[benchmark_obj] = func + + # Start timing from just before triggers are deployed. + result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) + result.begin() + + for benchmark_obj, func in benchmark_objs.items(): + trigger = benchmark_obj.benchmark_config.trigger + + trigger_type = Trigger.TriggerType.get(trigger) + triggers = func.triggers(trigger_type) + + if len(triggers) == 0: + if (benchmark_obj.benchmark_config.entrypoint): + trigger = deployment_client.create_trigger(func, trigger_type, with_result_queue=True) + else: + trigger = deployment_client.create_trigger(func, trigger_type) + else: + trigger = triggers[0] + + if (benchmark_obj.benchmark_config.entrypoint): + main_func = func + main_trigger = trigger + + func = main_func + trigger = main_trigger + + # Standalone function handling. else: - trigger = triggers[0] + benchmark_obj = sebs_client.get_benchmark( + benchmark, + deployment_client, + experiment_config, + logging_filename=logging_filename, + ) + if memory is not None: + benchmark_obj.benchmark_config.memory = memory + if timeout is not None: + benchmark_obj.benchmark_config.timeout = timeout + + function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) + + # GCP and Azure only allow one trigger per function, so augment function name with + # trigger type: _http, _queue etc. + # + # Additionally, Azure requires for the trigger to be defined at deployment time. + if deployment_client.name() == "gcp" or deployment_client.name() == "azure": + function_name = "{}-{}".format(function_name, trigger) + + if trigger == "queue" or trigger == "storage": + benchmark_obj.benchmark_config.result_queue = "{}-result".format(function_name) + + func = deployment_client.get_function( + benchmark_obj, + function_name, + ) + storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) + input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) + + result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) + result.begin() + + trigger_type = Trigger.TriggerType.get(trigger) + triggers = func.triggers(trigger_type) + if len(triggers) == 0: + if (trigger_type == Trigger.TriggerType.QUEUE or trigger_type == Trigger.TriggerType.STORAGE): + trigger = deployment_client.create_trigger(func, trigger_type, with_result_queue=True) + else: + trigger = deployment_client.create_trigger(func, trigger_type) + else: + trigger = triggers[0] + + # This part is common for both apps and functions. for i in range(repetitions): sebs_client.logging.info(f"Beginning repetition {i+1}/{repetitions}") ret = trigger.sync_invoke(input_config) - if ret.stats.failure: - sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - # deployment_client.get_invocation_error( - # function_name=func.name, start_time=start_time, end_time=end_time - # ) - result.add_invocation(func, ret) + for experiment in ret: + if experiment.stats.failure: + sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") + # deployment_client.get_invocation_error( + # function_name=func.name, start_time=start_time, end_time=end_time + # ) + result.add_invocation(func, experiment) result.end() result_file = os.path.join(output_dir, "experiments.json") diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 9bcb52e6..b147c996 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -199,6 +199,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun language_runtime, self.config.resources.lambda_role(self.session), function_cfg, + code_package.application_name, ) self.update_function(lambda_function, code_package) lambda_function.updated_code = True @@ -224,6 +225,12 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} + + # Result queue added as an env variable. + result_queue_env = {} + if (code_package.benchmark_config.result_queue): + result_queue_env["RESULT_QUEUE"] = code_package.benchmark_config.result_queue + ret = self.client.create_function( FunctionName=func_name, Runtime="{}{}".format( @@ -234,6 +241,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun MemorySize=memory, Timeout=timeout, Code=code_config, + Environment={"Variables": result_queue_env} ) lambda_function = LambdaFunction( @@ -245,6 +253,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun self.config.resources.lambda_role(self.session), function_cfg, code_bucket, + code_package.application_name ) self.wait_function_active(lambda_function) @@ -252,7 +261,11 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun # Add LibraryTrigger to a new function from sebs.aws.triggers import LibraryTrigger - trigger = LibraryTrigger(func_name, self) + trigger = LibraryTrigger( + func_name, + self, + application_name=code_package.application_name + ) trigger.logging_handlers = self.logging_handlers lambda_function.add_trigger(trigger) @@ -326,6 +339,13 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark self.wait_function_updated(function) self.logging.info(f"Updated configuration of {function.name} function. ") + @staticmethod + def default_application_name(code_package: Benchmark) -> str: + app_name = "{}-{}-{}".format( + code_package.application_name, code_package.language_name, code_package.language_version + ) + return AWS.format_function_name(app_name) + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -491,7 +511,12 @@ def download_metrics( f"out of {results_count} invocations" ) - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_trigger( + self, + func: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: from sebs.aws.triggers import HTTPTrigger, QueueTrigger, StorageTrigger function = cast(LambdaFunction, func) @@ -510,7 +535,13 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T Principal="apigateway.amazonaws.com", SourceArn=f"{http_api.arn}/*/*", ) - trigger = HTTPTrigger(http_api.endpoint, api_name) + trigger = HTTPTrigger( + func.name, + url=http_api.endpoint, + api_id=api_name, + application_name=func.application_name, + with_result_queue=with_result_queue + ) self.logging.info( f"Created HTTP trigger for {func.name} function. " "Sleep 5 seconds to avoid cloud errors." @@ -521,11 +552,21 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T # should already exist return func.triggers(Trigger.TriggerType.LIBRARY)[0] elif trigger_type == Trigger.TriggerType.QUEUE: - trigger = QueueTrigger(func.name, self) + trigger = QueueTrigger( + func.name, + application_name=func.application_name, + deployment_client=self, + with_result_queue=with_result_queue + ) trigger.logging_handlers = self.logging_handlers self.logging.info(f"Created Queue trigger for {func.name} function.") elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(func.name, self) + trigger = StorageTrigger( + func.name, + application_name=func.application_name, + deployment_client=self, + with_result_queue=with_result_queue + ) trigger.logging_handlers = self.logging_handlers self.logging.info(f"Created Storage trigger for {func.name} function.") else: diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 24ce4a8d..e36bc0a3 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -16,8 +16,9 @@ def __init__( role: str, cfg: FunctionConfig, bucket: Optional[str] = None, + application_name: Optional[str] = None ): - super().__init__(benchmark, name, code_package_hash, cfg) + super().__init__(benchmark, name, code_package_hash, cfg, application_name) self.arn = arn self.role = role self.runtime = runtime @@ -51,6 +52,7 @@ def deserialize(cached_config: dict) -> "LambdaFunction": cached_config["role"], cfg, cached_config["bucket"], + cached_config["application_name"], ) for trigger in cached_config["triggers"]: trigger_type = cast( diff --git a/sebs/aws/queue.py b/sebs/aws/queue.py new file mode 100644 index 00000000..d2d7f3a3 --- /dev/null +++ b/sebs/aws/queue.py @@ -0,0 +1,125 @@ +from typing import Optional, cast +from sebs.aws.aws import AWS +from sebs.cache import Cache +from sebs.faas.config import Resources +from sebs.faas.queue import Queue, QueueType + +import boto3 + + +class SQS(Queue): + @staticmethod + def typename() -> str: + return "AWS.SQS" + + @staticmethod + def deployment_name(): + return "aws" + + @property + def queue_url(self): + return self._queue_url + + @property + def queue_arn(self): + return self._queue_arn + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + region: str, + queue_url: Optional[str] = None, + queue_arn: Optional[str] = None + ): + super().__init__( + benchmark, + queue_type, + region + ) + self._queue_url = queue_url + self._queue_arn = queue_arn + + self.client = boto3.session.Session().client( + 'sqs', + region_name=self.region + ) + + def create_queue(self) -> str: + self.logging.debug(f"Creating queue {self.name}") + + if (self._queue_url and self._queue_arn): + self.logging.debug("Queue already exists, reusing...") + return + + self._queue_url = self.client.create_queue( + QueueName=self.name, + Attributes={ + "VisibilityTimeout": "3600" + } + )["QueueUrl"] + self._queue_arn = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=["QueueArn"], + )["Attributes"]["QueueArn"] + + self.logging.debug("Created queue") + + def remove_queue(self): + self.logging.info(f"Deleting queue {self.name}") + + self.client.delete_queue(QueueUrl=self.queue_url) + + self.logging.info("Deleted queue") + + def send_message(self, serialized_message: str): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=serialized_message, + ) + self.logging.info(f"Sent message to queue {self.name}") + + def receive_message(self) -> str: + self.logging.info(f"Pulling a message from {self.name}") + + response = self.client.receive_message( + QueueUrl=self.queue_url, + AttributeNames=["SentTimestamp"], + MaxNumberOfMessages=1, + MessageAttributeNames=["All"], + WaitTimeSeconds=5, + ) + + if ("Messages" not in response): + self.logging.info("No messages to be received") + return "" + + self.logging.info(f"Received a message from {self.name}") + + # Delete the message from the queue - serves as an acknowledgement + # that it was received. + self.client.delete_message( + QueueUrl=self.queue_url, + ReceiptHandle=response["Messages"][0]["ReceiptHandle"], + ) + + return response["Messages"][0]["Body"] + + def serialize(self) -> dict: + return { + "name": self.name, + "type": self.queue_type, + "region": self.region, + "queue_url": self.queue_url, + "queue_arn": self.queue_arn, + } + + @staticmethod + def deserialize(obj: dict) -> "SQS": + return SQS( + obj["name"], + obj["type"], + obj["region"], + obj["queue_url"], + obj["queue_arn"] + ) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 96b9bc20..f4717137 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -14,15 +14,39 @@ class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + def __init__( + self, + fname: str, + deployment_client: Optional[AWS] = None, + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() self.name = fname self._deployment_client = deployment_client + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = SQS( + f'{application_name}-result', + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: return "AWS.LibraryTrigger" + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + @property def deployment_client(self) -> AWS: assert self._deployment_client @@ -90,18 +114,49 @@ def async_invoke(self, payload: dict): return ret def serialize(self) -> dict: - return {"type": "Library", "name": self.name} + return { + "type": "Library", + "name": self.name, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue + } @staticmethod def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + return LibraryTrigger( + obj["name"], + None, + SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"] + ) class HTTPTrigger(Trigger): - def __init__(self, url: str, api_id: str): + def __init__( + self, + fname: str, + url: str, + api_id: str, + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() + self.name = fname self.url = url self.api_id = api_id + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = SQS( + f'{application_name}-result', + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -111,6 +166,11 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.url}") @@ -123,11 +183,24 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "HTTP", "url": self.url, "api-id": self.api_id} + return { + "type": "HTTP", + "name": self.name, + "url": self.url, + "api-id": self.api_id, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue + } @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"], obj["api-id"]) + return HTTPTrigger( + obj["name"], + obj["url"], + obj["api-id"], + SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"] + ) class QueueTrigger(Trigger): @@ -136,13 +209,16 @@ def __init__( fname: str, deployment_client: Optional[AWS] = None, queue: Optional[SQS] = None, - result_queue: Optional[SQS] = None + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False ): super().__init__() self.name = fname self._queue = queue self._result_queue = result_queue self._deployment_client = deployment_client + self.with_result_queue = with_result_queue if (not self._queue): self._queue = SQS( @@ -162,14 +238,16 @@ def __init__( lambda_client.create_event_source_mapping( EventSourceArn=self.queue.queue_arn, FunctionName=self.name, + Enabled=True, + BatchSize=1, MaximumBatchingWindowInSeconds=1, ) # Create result queue for communicating benchmark results back to the # client. - if (not self._result_queue): + if (self.with_result_queue and not self._result_queue): self._result_queue = SQS( - fname, + f'{application_name}-result', QueueType.RESULT, self.deployment_client.config.region ) @@ -211,15 +289,15 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: begin = datetime.datetime.now() self.queue.send_message(serialized_payload) - response = "" - while (response == ""): - response = self.result_queue.receive_message() - - end = datetime.datetime.now() + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) - result = ExecutionResult.from_times(begin, end) - result.parse_benchmark_output(json.loads(response)) - return result + return ret def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -232,7 +310,8 @@ def serialize(self) -> dict: "type": "Queue", "name": self.name, "queue": self.queue.serialize(), - "result_queue": self.result_queue.serialize() + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue } @staticmethod @@ -241,7 +320,8 @@ def deserialize(obj: dict) -> Trigger: obj["name"], None, SQS.deserialize(obj["queue"]), - SQS.deserialize(obj["result_queue"]) + SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"] ) @@ -251,21 +331,17 @@ def __init__( fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None, - result_queue: Optional[SQS] = None + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False ): super().__init__() self.name = fname - self._deployment_client = None - self._bucket_name = None - self._result_queue = None - - if deployment_client: - self._deployment_client = deployment_client - if bucket_name: - self._bucket_name = bucket_name - if result_queue: - self._result_queue = result_queue + self._deployment_client = deployment_client + self._bucket_name = bucket_name + self._result_queue = result_queue + self.with_result_queue = with_result_queue # When creating the trigger for the first time, also create and store # storage bucket information. @@ -317,9 +393,9 @@ def __init__( # Create result queue for communicating benchmark results back to the # client. - if (not self._result_queue): + if (self.with_result_queue and not self._result_queue): self._result_queue = SQS( - fname, + f'{application_name}-result', QueueType.RESULT, self.deployment_client.config.region ) @@ -364,15 +440,15 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: s3.Object(self.bucket_name, "payload.json").put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") - response = "" - while (response == ""): - response = self.result_queue.receive_message() + results = self.collect_async_results(self.result_queue) - end = datetime.datetime.now() + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) - result = ExecutionResult.from_times(begin, end) - result.parse_benchmark_output(json.loads(response)) - return result + return ret def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -385,14 +461,15 @@ def serialize(self) -> dict: "type": "Storage", "name": self.name, "bucket_name": self.bucket_name, - "result_queue": self.result_queue.serialize() + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue } @staticmethod def deserialize(obj: dict) -> Trigger: return StorageTrigger( - obj["name"], - None, - obj["bucket_name"], - SQS.deserialize(obj["result_queue"]) + fname=obj["name"], + bucket_name=obj["bucket_name"], + result_queue=SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"] ) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index f3257d3a..f96ba3f3 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -12,7 +12,7 @@ from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction from sebs.azure.config import AzureConfig, AzureResources -from sebs.azure.triggers import AzureTrigger, HTTPTrigger +from sebs.azure.triggers import AzureTrigger, HTTPTrigger, QueueTrigger, StorageTrigger from sebs.faas.function import Trigger from sebs.benchmark import Benchmark from sebs.cache import Cache @@ -171,7 +171,7 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "queueTrigger", "direction": "in", "queueName": benchmark, - "connection": "AzureWebJobsStorage", + "connection": "DATA_STORAGE_ACCOUNT_CONN_STR", } ], } @@ -185,7 +185,7 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "blobTrigger", "direction": "in", "path": benchmark, - "connection": "AzureWebJobsStorage", + "connection": "DATA_STORAGE_ACCOUNT_CONN_STR", } ], } @@ -349,14 +349,7 @@ def update_function(self, function: Function, code_package: Benchmark): # Mount code package in Docker instance self._mount_function_code(code_package) - url = self.publish_function(function, code_package, True) - - if function.name.endswith("http"): - trigger = HTTPTrigger( - url, self.config.resources.data_storage_account(self.cli_instance) - ) - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) + url = self.publish_function(function, code_package, True) def update_function_configuration(self, function: Function, code_package: Benchmark): # FIXME: this does nothing currently - we don't specify timeout @@ -367,6 +360,19 @@ def update_function_configuration(self, function: Function, code_package: Benchm def _mount_function_code(self, code_package: Benchmark): self.cli_instance.upload_package(code_package.code_location, "/mnt/function/") + def default_application_name(self, code_package: Benchmark) -> str: + func_name = ( + "{}-{}-{}-{}".format( + code_package.application_name, + code_package.language_name, + code_package.language_version, + self.config.resources.resources_id, + ) + .replace(".", "-") + .replace("_", "-") + ) + return func_name + def default_function_name(self, code_package: Benchmark) -> str: """ Functionapp names must be globally unique in Azure. @@ -433,6 +439,31 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct " --name {func_name} --storage-account {storage_account}" ).format(**config) ) + + # Add result queue env var. + result_queue_env = f"RESULT_QUEUE={code_package.benchmark_config.result_queue}" + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings {result_queue_env}" + ) + + # Set the data storage account as env vars in the function. + resource_group = self.config.resources.resource_group(self.cli_instance) + data_storage_account = self.config.resources.data_storage_account(self.cli_instance) + + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings DATA_STORAGE_ACCOUNT={data_storage_account.account_name}" + ) + + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings DATA_STORAGE_ACCOUNT_CONN_STR={data_storage_account.connection_string}" + ) + self.logging.info("Azure: Created function app {}".format(func_name)) break except RuntimeError as e: @@ -450,6 +481,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct code_hash=code_package.hash, function_storage=function_storage_account, cfg=function_cfg, + application_name=code_package.application_name, ) # update existing function app @@ -576,63 +608,124 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) the user when SeBS is run. """ - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.azure.triggers import QueueTrigger, StorageTrigger + def create_trigger( + self, + function: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: azure_function = cast(AzureFunction, function) resource_group = self.config.resources.resource_group(self.cli_instance) - storage_account = azure_function.function_storage.account_name - - user_principal_name = self.cli_instance.execute("az ad user list") + data_storage_account = self.config.resources.data_storage_account(self.cli_instance).account_name storage_account_scope = self.cli_instance.execute( ("az storage account show --resource-group {} --name {} --query id").format( - resource_group, storage_account + resource_group, data_storage_account ) - ) + ).decode('utf-8') - self.cli_instance.execute( - ( - 'az role assignment create --assignee "{}" \ - --role "Storage {} Data Contributor" \ - --scope {}' - ).format( - json.loads(user_principal_name.decode("utf-8"))[0]["userPrincipalName"], - "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", - storage_account_scope.decode("utf-8"), + user_principal_name = self.cli_instance.execute("az ad user list").decode('utf-8') + + # All functions in an application need permission to write to the + # result queue. + if (function.application_name is not None): + function_principal = self.cli_instance.execute( + ( + 'az functionapp identity assign \ + --name {} \ + --resource-group {}' + ).format( + function.name, + self.config.resources.resource_group(self.cli_instance) + ) + ).decode('utf-8') + + self.cli_instance.execute( + ( + 'az role assignment create --assignee "{}" \ + --role "Storage Queue Data Contributor" \ + --scope {}' + ).format( + json.loads(function_principal)['principalId'], + storage_account_scope + ) ) - ) - trigger: Trigger - if trigger_type == Trigger.TriggerType.QUEUE or trigger_type == Trigger.TriggerType.STORAGE: - resource_group = self.config.resources.resource_group(self.cli_instance) + # Storage-triggered functions require Blob Storage access. + if (trigger_type == Trigger.TriggerType.STORAGE): + self.cli_instance.execute( + ( + 'az role assignment create --assignee "{}" \ + --role "Storage Blob Data Owner" \ + --scope {}' + ).format( + json.loads(user_principal_name)[0]["userPrincipalName"], + storage_account_scope, + ) + ) - # Set the storage account as an env var on the function. - ret = self.cli_instance.execute( - f"az functionapp config appsettings set --name {function.name} " - f" --resource-group {resource_group} " - f" --settings STORAGE_ACCOUNT={storage_account}" + # Everything async needs queue access attached to the SeBS client. + if (function.application_name is not None + or trigger_type == Trigger.TriggerType.QUEUE + or trigger_type == Trigger.TriggerType.STORAGE + ): + self.cli_instance.execute( + ( + 'az role assignment create --assignee "{}" \ + --role "Storage Queue Data Contributor" \ + --scope {}' + ).format( + json.loads(user_principal_name)[0]["userPrincipalName"], + storage_account_scope, + ) ) - print(ret.decode()) # Connect the function app to the result queue via Service # Connector. - ret = self.cli_instance.execute( - f"az webapp connection create storage-queue " - f" --resource-group {resource_group} " - f" --target-resource-group {resource_group} " - f" --account {storage_account} " - f" --name {function.name} " - f" --system-identity " + self.cli_instance.execute( + ( + 'az webapp connection create storage-queue \ + --resource-group {} \ + --target-resource-group {} \ + --account {} \ + --name {} \ + --system-identity' + ).format( + resource_group, + resource_group, + data_storage_account, + function.name + ) + ) + + trigger: Trigger + if trigger_type == Trigger.TriggerType.HTTP: + trigger = HTTPTrigger( + function.name, + url=url, + storage_account=data_storage_account, + application_name=function.application_name, + ) + self.logging.info(f"Created HTTP trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger( + function.name, + storage_account=data_storage_account, + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue, + ) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger( + function.name, + storage_account=data_storage_account, + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue, ) - print(ret.decode()) - - if trigger_type == Trigger.TriggerType.QUEUE: - trigger = QueueTrigger(function.name, storage_account, self.config.region) - self.logging.info(f"Created Queue trigger for {function.name} function") - elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name, storage_account, self.config.region) - self.logging.info(f"Created Storage trigger for {function.name} function") + self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 375c0b79..abff5f92 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,4 +1,4 @@ -from typing import cast +from typing import cast, Optional from sebs.azure.config import AzureResources from sebs.faas.function import Function, FunctionConfig @@ -12,8 +12,9 @@ def __init__( code_hash: str, function_storage: AzureResources.Storage, cfg: FunctionConfig, + application_name: Optional[str] = None ): - super().__init__(benchmark, name, code_hash, cfg) + super().__init__(benchmark, name, code_hash, cfg, application_name) self.function_storage = function_storage @staticmethod @@ -38,6 +39,7 @@ def deserialize(cached_config: dict) -> Function: cached_config["hash"], AzureResources.Storage.deserialize(cached_config["function_storage"]), cfg, + cached_config["application_name"], ) for trigger in cached_config["triggers"]: trigger_type = cast( diff --git a/sebs/azure/queue.py b/sebs/azure/queue.py new file mode 100644 index 00000000..a9698254 --- /dev/null +++ b/sebs/azure/queue.py @@ -0,0 +1,100 @@ +import time + +from sebs.faas.queue import Queue, QueueType + +from azure.core.exceptions import ResourceExistsError +from azure.identity import DefaultAzureCredential +from azure.storage.queue import QueueClient + + +class AzureQueue(Queue): + @staticmethod + def typename() -> str: + return "Azure.Queue" + + @staticmethod + def deployment_name(): + return "azure" + + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account + + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.queue.core.windows.net" + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + storage_account: str, + region: str + ): + default_credential = DefaultAzureCredential() + + super().__init__( + benchmark, + queue_type, + region + ) + self._storage_account = storage_account + self.client = QueueClient(self.account_url, + queue_name=self.name, + credential=default_credential) + + def create_queue(self): + self.logging.info(f"Creating queue {self.name}") + + try: + self.client.create_queue() + self.logging.info("Created queue") + except ResourceExistsError: + self.logging.info("Queue already exists, reusing...") + + def remove_queue(self): + self.logging.info(f"Deleting queue {self.name}") + + self.client.delete_queue() + + self.logging.info("Deleted queue") + + def send_message(self, serialized_message: str): + self.client.send_message(serialized_message) + self.logging.info(f"Sent message to queue {self.name}") + + def receive_message(self) -> str: + self.logging.info(f"Pulling a message from {self.name}") + + response = self.client.receive_messages( + max_messages=1, + timeout=5, + ) + + for msg in response: + self.logging.info(f"Received a message from {self.name}") + self.client.delete_message(msg) + return msg.content + + self.logging.info("No messages to be received") + + time.sleep(5) + return "" + + def serialize(self) -> dict: + return { + "name": self.name, + "type": self.queue_type, + "storage_account": self.storage_account, + "region": self.region + } + + @staticmethod + def deserialize(obj: dict) -> "AzureQueue": + return AzureQueue( + obj["name"], + obj["type"], + obj["storage_account"], + obj["region"] + ) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 2a2e96bc..95d06a2f 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -32,14 +32,40 @@ def data_storage_account(self, data_storage_account: AzureResources.Storage): class HTTPTrigger(AzureTrigger): - def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): + def __init__( + self, + fname: str, + url: str, + data_storage_account: Optional[AzureResources.Storage] = None, + result_queue: Optional[AzureQueue] = None, + with_result_queue: Optional[bool] = False + ): super().__init__(data_storage_account) + self.name = fname self.url = url + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = AzureQueue( + self.name, + QueueType.RESULT, + data_storage_account, + self.region + ) + self._result_queue.create_queue() @staticmethod def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: payload["connection_string"] = self.data_storage_account.connection_string @@ -51,11 +77,22 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "HTTP", "url": self.url} + return { + "type": "HTTP", + "name": self.name, + "url": self.url, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"]) + return HTTPTrigger( + obj["name"], + obj["url"], + AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], + ) class QueueTrigger(Trigger): @@ -65,7 +102,9 @@ def __init__( storage_account: str, region: str, queue: Optional[AzureQueue] = None, - result_queue: Optional[AzureQueue] = None + application_name: Optional[str] = None, + result_queue: Optional[AzureQueue] = None, + with_result_queue: Optional[bool] = False ): super().__init__() self.name = fname @@ -73,6 +112,7 @@ def __init__( self._region = region self._queue = queue self._result_queue = result_queue + self.with_result_queue = with_result_queue if (not self._queue): self._queue = AzureQueue( @@ -83,11 +123,13 @@ def __init__( ) self.queue.create_queue() - if (not self._result_queue): + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): self._result_queue = AzureQueue( - fname, + f"{application_name}-result", QueueType.RESULT, - storage_account, + self.storage_account, self.region ) self._result_queue.create_queue() @@ -138,17 +180,15 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: begin = datetime.datetime.now() self.queue.send_message(serialized_payload) - response = "" - while (response == ""): - response = self.result_queue.receive_message() - if (response == ""): - time.sleep(5) + results = self.collect_async_results(self.result_queue) - end = datetime.datetime.now() + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) - result = ExecutionResult.from_times(begin, end) - result.parse_benchmark_output(json.loads(response)) - return result + return ret def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -163,7 +203,8 @@ def serialize(self) -> dict: "storage_account": self.storage_account, "region": self.region, "queue": self.queue.serialize(), - "result_queue": self.result_queue.serialize() + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, } @staticmethod @@ -173,7 +214,8 @@ def deserialize(obj: dict) -> Trigger: obj["storage_account"], obj["region"], AzureQueue.deserialize(obj["queue"]), - AzureQueue.deserialize(obj["result_queue"]) + AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], ) @@ -183,14 +225,17 @@ def __init__( fname: str, storage_account: str, region: str, + application_name: Optional[str] = None, result_queue: Optional[AzureQueue] = None, - container_name: Optional[str] = None + with_result_queue: Optional[bool] = False, + container_name: Optional[str] = None, ): super().__init__() self.name = fname self._storage_account = storage_account self._region = region self._result_queue = result_queue + self.with_result_queue = with_result_queue self._container_name = None if container_name: @@ -214,11 +259,13 @@ def __init__( except ResourceExistsError: self.logging.info("Container already exists, reusing...") - if (not self._result_queue): + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): self._result_queue = AzureQueue( - fname, + f"{application_name}-result", QueueType.RESULT, - storage_account, + self.storage_account, self.region ) self._result_queue.create_queue() @@ -255,7 +302,7 @@ def container_name(self) -> str: assert self._container_name return self._container_name - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: dict) -> list[ExecutionResult]: self.logging.info(f"Invoke function {self.name}") @@ -277,16 +324,15 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") - response = "" - while (response == ""): - time.sleep(5) - response = self.result_queue.receive_message() - - end = datetime.datetime.now() + results = self.collect_async_results(self.result_queue) - result = ExecutionResult.from_times(begin, end) - result.parse_benchmark_output(json.loads(response)) - return result + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -300,16 +346,18 @@ def serialize(self) -> dict: "name": self.name, "storage_account": self.storage_account, "region": self.region, - "result_queue": self.result_queue.serialize(), + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, "container_name": self.container_name, } @staticmethod def deserialize(obj: dict) -> Trigger: return StorageTrigger( - obj["name"], - obj["storage_account"], - obj["region"], - AzureQueue.deserialize(obj["result_queue"]), - obj["container_name"] + fname=obj["name"], + storage_account=obj["storage_account"], + region=obj["region"], + result_queue=AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], + container_name=obj["container_name"], ) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f0911708..b1e71935 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -23,10 +23,21 @@ class BenchmarkConfig: - def __init__(self, timeout: int, memory: int, languages: List["Language"]): + def __init__( + self, + timeout: int, + memory: int, + languages: List["Language"], + trigger: Optional[str] = None, + entrypoint: Optional[bool] = False, + result_queue: Optional[str] = None + ): self._timeout = timeout self._memory = memory self._languages = languages + self._trigger = trigger + self._entrypoint = entrypoint + self._result_queue = result_queue @property def timeout(self) -> int: @@ -48,6 +59,26 @@ def memory(self, val: int): def languages(self) -> List["Language"]: return self._languages + @property + def trigger(self) -> str: + return self._trigger + + @trigger.setter + def trigger(self, val: str): + self._trigger = val + + @property + def entrypoint(self) -> bool: + return self._entrypoint + + @property + def result_queue(self) -> str: + return self._result_queue + + @result_queue.setter + def result_queue(self, val: str): + self._result_queue = val + # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(json_object: dict) -> "BenchmarkConfig": @@ -57,6 +88,9 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": json_object["timeout"], json_object["memory"], [Language.deserialize(x) for x in json_object["languages"]], + json_object["trigger"] if "trigger" in json_object else None, + json_object["entrypoint"] if "entrypoint" in json_object else None, + json_object["result_queue"] if "result_queue" in json_object else None ) @@ -137,6 +171,14 @@ def language_name(self) -> str: def language_version(self): return self._language_version + @property + def application_name(self) -> str: + return self._application_name + + @application_name.setter + def application_name(self, val: str): + self._application_name = val + @property # noqa: A003 def hash(self): path = os.path.join(self.benchmark_path, self.language_name) @@ -159,6 +201,7 @@ def __init__( output_dir: str, cache_client: Cache, docker_client: docker.client, + app_function_name: Optional[str] = None ): super().__init__() self._benchmark = benchmark @@ -166,9 +209,11 @@ def __init__( self._experiment_config = config self._language = config.runtime.language self._language_version = config.runtime.version - self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") + self._application_name = benchmark if app_function_name is not None else None + self._benchmark_path = find_benchmark(self.benchmark, "benchmarks", app_function_name) if not self._benchmark_path: - raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) + benchmark = f"{self._benchmark}-{app_function_name}" if app_function_name is not None else self._benchmark + raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=benchmark)) with open(os.path.join(self.benchmark_path, "config.json")) as json_file: self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) @@ -181,9 +226,15 @@ def __init__( self._docker_client = docker_client self._system_config = system_config self._hash_value = None - self._output_dir = os.path.join( - output_dir, f"{benchmark}_code", self._language.value, self._language_version - ) + if (self.application_name): + self._output_dir = os.path.join( + output_dir, f"{benchmark}_code", app_function_name, self._language.value, self._language_version + ) + self._benchmark = '{}.{}'.format(self._benchmark, app_function_name) + else: + self._output_dir = os.path.join( + output_dir, f"{benchmark}_code", self._language.value, self._language_version + ) # verify existence of function in cache self.query_cache() @@ -471,12 +522,7 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] - # TODO(oana) fix? - # self, - # deployment_build_step: Callable[ - # [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] - # ], + self, deployment_build_step: Callable[[str, str, str, str, bool, Optional[str]], Tuple[str, int]] ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild @@ -511,7 +557,7 @@ def build( self.language_version, self.benchmark, self.is_cached_valid, - self._experiment_config.trigger, + self.benchmark_config.trigger, ) self.logging.info( ( @@ -545,8 +591,12 @@ def build( """ def prepare_input(self, storage: PersistentStorage, size: str): - benchmark_data_path = find_benchmark(self._benchmark, "benchmarks-data") - mod = load_benchmark_input(self._benchmark_path) + # The root benchmark name, i.e. xxx.airline-booking. + root_benchmark = '{}.{}'.format(self.benchmark.split('.')[0], self.benchmark.split('.')[1]) + benchmark_data_path = find_benchmark(root_benchmark, "benchmarks-data") + + temp_path = find_benchmark(root_benchmark, "benchmarks") + mod = load_benchmark_input(temp_path) buckets = mod.buckets_count() input, output = storage.benchmark_data(self.benchmark, buckets) @@ -565,7 +615,7 @@ def prepare_input(self, storage: PersistentStorage, size: str): self._cache_client.update_storage( storage.deployment_name(), - self._benchmark, + self.benchmark, { "buckets": { "input": storage.input_prefixes, diff --git a/sebs/faas/function.py b/sebs/faas/function.py index df732360..55265a9d 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -10,6 +10,7 @@ from typing import Callable, Dict, List, Optional, Type, TypeVar # noqa from sebs.benchmark import Benchmark +from sebs.faas.queue import Queue from sebs.utils import LoggingBase """ @@ -237,6 +238,53 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec self.logging.error("No output provided!") raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") + # Common method to collect the measurement results of applications or + # queue/storage-triggered functions. + # + # :param result_queue: The result queue to read from. + # :return: dictionary from end timestamps to the actual measurement data + def collect_async_results(self, result_queue: Queue) -> dict: + # Executions map from function invocation id to the # of new functions + # invoked by that id. + executions = {} + ret = {} + message = "" + + while (True): + message = result_queue.receive_message() + if (message != ""): + end = datetime.now() + + message = json.loads(message) + ret[end] = message + + if ('fns_triggered' in message['result']): + fns_triggered = message['result']['fns_triggered'] + execution_id = message['request_id'] + + if (execution_id not in executions): + executions[execution_id] = fns_triggered + else: + executions[execution_id] += fns_triggered + if (executions[execution_id] == 0): + executions.pop(execution_id) + + if ('parent_execution_id' in message['result']): + parent_execution_id = message['result']['parent_execution_id'] + + if (parent_execution_id in executions): + executions[parent_execution_id] -= 1 + else: + executions[parent_execution_id] = -1 + if (executions[parent_execution_id] == 0): + executions.pop(parent_execution_id) + + if (not executions): + break + + message = "" + return ret + # FIXME: 3.7+, future annotations @staticmethod @abstractmethod @@ -348,7 +396,14 @@ def serialize(self) -> dict: class Function(LoggingBase): - def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): + def __init__( + self, + benchmark: str, + name: str, + code_hash: str, + cfg: FunctionConfig, + application_name: Optional[str] = None + ): super().__init__() self._benchmark = benchmark self._name = name @@ -356,6 +411,7 @@ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfi self._updated_code = False self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} self._cfg = cfg + self._application_name = application_name @property def config(self) -> FunctionConfig: @@ -369,6 +425,10 @@ def name(self): def benchmark(self): return self._benchmark + @property + def application_name(self): + return self._application_name + @property def code_package_hash(self): return self._code_package_hash @@ -409,6 +469,7 @@ def serialize(self) -> dict: "triggers": [ obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers ], + "application_name": self._application_name, } @staticmethod diff --git a/sebs/faas/queue.py b/sebs/faas/queue.py new file mode 100644 index 00000000..b0b5b2ca --- /dev/null +++ b/sebs/faas/queue.py @@ -0,0 +1,62 @@ +from abc import ABC +from abc import abstractmethod +from enum import Enum + +from sebs.utils import LoggingBase + +class QueueType(str, Enum): + TRIGGER = "trigger" + RESULT = "result" + + +class Queue(ABC, LoggingBase): + + @staticmethod + @abstractmethod + def deployment_name() -> str: + pass + + @property + def region(self): + return self._region + + @property + def queue_type(self): + return self._queue_type + + @property + def name(self): + return self._name + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + region: str + ): + super().__init__() + self._name = benchmark + + # Convention: the trigger queue carries the name of the function. The + # result queue carries the name of the function + "-result". + if (queue_type == QueueType.RESULT and not benchmark.endswith("-result")): + self._name = "{}-{}".format(benchmark, queue_type) + + self._queue_type = queue_type + self._region = region + + @abstractmethod + def create_queue(self): + pass + + @abstractmethod + def remove_queue(self): + pass + + @abstractmethod + def send_message(self, serialized_message: str): + pass + + @abstractmethod + def receive_message(self) -> str: + pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 5b93c053..890c68cc 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -143,20 +143,22 @@ def remove_bucket(self, bucket: str): def benchmark_data( self, benchmark: str, requested_buckets: Tuple[int, int] ) -> Tuple[List[str], List[str]]: + # The root benchmark name, i.e. xxx.map-reduce. + root_benchmark = '{}.{}'.format(benchmark.split('.')[0], benchmark.split('.')[1]) """ Add an input path inside benchmarks bucket. Bucket name format: name-idx-input """ for i in range(0, requested_buckets[0]): - self.input_prefixes.append("{}-{}-input".format(benchmark, i)) + self.input_prefixes.append("{}-{}-input".format(root_benchmark, i)) """ Add an input path inside benchmarks bucket. Bucket name format: name-idx-output """ for i in range(0, requested_buckets[1]): - self.output_prefixes.append("{}-{}-output".format(benchmark, i)) + self.output_prefixes.append("{}-{}-output".format(root_benchmark, i)) cached_storage = self.cache_client.get_storage_config(self.deployment_name(), benchmark) self.cached = True diff --git a/sebs/faas/system.py b/sebs/faas/system.py index e126310a..8cdd1fb7 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -318,6 +318,10 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma return changed + @abstractmethod + def default_application_name(self, code_package: Benchmark) -> str: + pass + @abstractmethod def default_function_name(self, code_package: Benchmark) -> str: pass @@ -338,7 +342,12 @@ def download_metrics( pass @abstractmethod - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_trigger( + self, + function: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: pass # @abstractmethod diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 09cab242..21f19c9d 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -13,8 +13,9 @@ def __init__( code_package_hash: str, cfg: FunctionConfig, bucket: Optional[str] = None, + application_name: Optional[str] = None ): - super().__init__(benchmark, name, code_package_hash, cfg) + super().__init__(benchmark, name, code_package_hash, cfg, application_name) self.bucket = bucket @staticmethod @@ -39,6 +40,7 @@ def deserialize(cached_config: dict) -> "GCPFunction": cached_config["hash"], cfg, cached_config["bucket"], + cached_config["application_name"], ) for trigger in cached_config["triggers"]: trigger_type = cast( diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index e8614cdc..6412c3c0 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -191,6 +191,14 @@ def create_trigger_resource(self, func_name: str, cached=False) -> Dict: # HTTP triggers do not require resource creation return {"httpsTrigger": {}, "entryPoint": "handler_http"} + @staticmethod + def default_application_name(code_package: Benchmark) -> str: + # Create function name + func_name = "{}-{}-{}".format( + code_package.application_name, code_package.language_name, code_package.language_version + ) + return GCP.format_function_name(func_name) + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -305,6 +313,9 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti full_func_name = GCP.get_full_function_name(project_name, location, func_name) get_req = self.function_client.projects().locations().functions().get(name=full_func_name) + # Add result queue env var. + result_queue_env = {"RESULT_QUEUE": code_package.benchmark_config.result_queue} + try: get_req.execute() except HttpError: @@ -327,6 +338,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti "timeout": str(timeout) + "s", "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, + "environmentVariables": result_queue_env, } | trigger_info, ) @@ -352,7 +364,12 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti self.logging.info(f"Function {func_name} accepts now unauthenticated invocations!") function = GCPFunction( - func_name, benchmark, code_package.hash, function_cfg, code_bucket + name=func_name, + benchmark=benchmark, + code_package_hash=code_package.hash, + cfg=function_cfg, + bucket=code_bucket, + application_name=code_package.application_name ) else: # if result is not empty, then function does exists @@ -364,6 +381,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti code_package_hash=code_package.hash, cfg=function_cfg, bucket=code_bucket, + application_name=code_package.application_name ) self.update_function(function, code_package) @@ -376,7 +394,12 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti return function - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_trigger( + self, + function: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: from sebs.gcp.triggers import HTTPTrigger, QueueTrigger, StorageTrigger location = self.config.region @@ -398,20 +421,29 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) trigger: Trigger if trigger_type == Trigger.TriggerType.HTTP: invoke_url = status_res["httpsTrigger"]["url"] - trigger = HTTPTrigger(invoke_url) + trigger = HTTPTrigger( + function.name, + url=invoke_url, + application_name=function.application_name, + with_result_queue=with_result_queue + ) self.logging.info(f"Created HTTP trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger( function.name, - self.get_trigger_resource_name(function.name), - self.config.region + queue_name=self.get_trigger_resource_name(function.name), + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue ) self.logging.info(f"Created Queue trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.STORAGE: trigger = StorageTrigger( function.name, - self.get_trigger_resource_name(function.name), - self.config.region + bucket_name=self.get_trigger_resource_name(function.name), + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue ) self.logging.info(f"Created Storage trigger for {function.name} function") else: @@ -458,6 +490,9 @@ def update_function(self, function: Function, code_package: Benchmark): # bucket) exist on GCP. trigger_info = self.create_trigger_resource(function.name, cached=True) + # Add result queue env var. + result_queue_env = {"RESULT_QUEUE": code_package.benchmark_config.result_queue} + req = ( self.function_client.projects() .locations() @@ -470,6 +505,7 @@ def update_function(self, function: Function, code_package: Benchmark): "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, + "environmentVariables": result_queue_env, } | trigger_info, ) diff --git a/sebs/gcp/queue.py b/sebs/gcp/queue.py new file mode 100644 index 00000000..189fafef --- /dev/null +++ b/sebs/gcp/queue.py @@ -0,0 +1,124 @@ +from sebs.faas.queue import Queue, QueueType + +from google.api_core import retry +from google.api_core.exceptions import AlreadyExists +from google.cloud import pubsub_v1 + +import os + + +class GCPQueue(Queue): + @staticmethod + def typename() -> str: + return "GCP.Queue" + + @staticmethod + def deployment_name(): + return "gcp" + + @property + def topic_name(self): + return self._topic_name + + @property + def subscription_name(self): + return self._subscription_name + + @property + def subscription_client(self): + return self._subscription_client + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + region: str + ): + super().__init__( + benchmark, + queue_type, + region + ) + self.client = pubsub_v1.PublisherClient() + self._subscription_client = pubsub_v1.SubscriberClient() + + self._topic_name = 'projects/{project_id}/topics/{topic}'.format( + project_id=os.getenv('GOOGLE_CLOUD_PROJECT'), + topic=self.name, + ) + self._subscription_name = 'projects/{project_id}/subscriptions/{sub}'.format( + project_id=os.getenv('GOOGLE_CLOUD_PROJECT'), + sub=self.name, + ) + + def create_queue(self): + self.logging.info(f"Creating queue {self.name}") + try: + self.client.create_topic(name=self.topic_name) + self.logging.info("Created queue") + except AlreadyExists: + self.logging.info("Queue already exists, reusing...") + + # GCP additionally needs a 'subscription' resource which is the + # actual receiver of the messages. It is constructed and destructed + # alongside the topic at all times. + self.logging.info(f"Creating queue subscription") + try: + self.subscription_client.create_subscription( + name=self.subscription_name, + topic=self.topic_name + ) + self.logging.info("Created queue subscription") + except AlreadyExists: + self.logging.info("Subscription already exists, reusing...") + + def remove_queue(self): + self.logging.info(f"Deleting queue and associated subscription{self.name}") + + self.client.delete_topic(topic=self.topic_name) + self.subscription_client.delete_subscription(subscription=self.subscription_name) + + self.logging.info("Deleted queue and associated subscription") + + def send_message(self, serialized_message: str): + self.client.publish(self.topic_name, serialized_message.decode("utf-8")) + self.logging.info(f"Sent message to queue {self.name}") + + # Receive messages through the 'pull' (sync) method. + def receive_message(self) -> str: + self.logging.info(f"Pulling a message from {self.name}") + + response = self.subscription_client.pull( + subscription=self.subscription_name, + max_messages=1, + retry=retry.Retry(deadline=5), + ) + + if (len(response.received_messages) == 0): + self.logging.info("No messages to be received") + return "" + + # Acknowledge the received message so it is not sent again. + received_message = response.received_messages[0] + self.subscription_client.acknowledge( + subscription=self.subscription_name, + ack_ids=[received_message.ack_id], + ) + self.logging.info(f"Received a message from {self.name}") + + return received_message.message.data + + def serialize(self) -> dict: + return { + "name": self.name, + "type": self.queue_type, + "region": self.region, + } + + @staticmethod + def deserialize(obj: dict) -> "GCPQueue": + return GCPQueue( + obj["name"], + obj["type"], + obj["region"], + ) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 41fbe18c..80942806 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -17,10 +17,29 @@ class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + def __init__( + self, + fname: str, + deployment_client: Optional[GCP] = None, + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() self.name = fname self._deployment_client = deployment_client + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = GCPQueue( + f"{application_name}-result", + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -39,6 +58,11 @@ def deployment_client(self, deployment_client: GCP): def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.LIBRARY + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") @@ -83,17 +107,46 @@ def async_invoke(self, payload: dict): raise NotImplementedError() def serialize(self) -> dict: - return {"type": "Library", "name": self.name} + return { + "type": "Library", + "name": self.name, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } @staticmethod def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + return LibraryTrigger( + obj["name"], + GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], + ) class HTTPTrigger(Trigger): - def __init__(self, url: str): + def __init__( + self, + fname: str, + url: str, + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() + self.name = fname self.url = url + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = GCPQueue( + f"{application_name}-result", + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -103,6 +156,11 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.url}") @@ -114,11 +172,22 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "HTTP", "url": self.url} + return { + "type": "HTTP", + "name": self.name, + "url": self.url, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"]) + return HTTPTrigger( + obj["name"], + obj["url"], + GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], + ) class QueueTrigger(Trigger): @@ -127,19 +196,22 @@ def __init__( fname: str, queue_name: str, region: str, - result_queue: Optional[GCPQueue] = None + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False ): super().__init__() self.name = fname self._queue_name = queue_name self._region = region self._result_queue = result_queue + self.with_result_queue = with_result_queue # Create result queue for communicating benchmark results back to the # client. - if (not self._result_queue): + if (self.with_result_queue and not self._result_queue): self._result_queue = GCPQueue( - fname, + f"{application_name}-result", QueueType.RESULT, self.region ) @@ -188,15 +260,15 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: }, ).execute() - response = "" - while (response == ""): - response = self.result_queue.receive_message() + results = self.collect_async_results(self.result_queue) - end = datetime.datetime.now() + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) - result = ExecutionResult.from_times(begin, end) - result.parse_benchmark_output(json.loads(response)) - return result + return ret def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -210,7 +282,8 @@ def serialize(self) -> dict: "name": self.name, "queue_name": self.queue_name, "region": self.region, - "result_queue": self.result_queue.serialize() + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, } @staticmethod @@ -219,7 +292,8 @@ def deserialize(obj: dict) -> Trigger: obj["name"], obj["queue_name"], obj["region"], - GCPQueue.deserialize(obj["result_queue"]) + GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], ) @@ -229,19 +303,22 @@ def __init__( fname: str, bucket_name: str, region: str, - result_queue: Optional[GCPQueue] = None + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False ): super().__init__() self.name = fname self._bucket_name = bucket_name self._region = region self._result_queue = result_queue + self.with_result_queue = with_result_queue # Create result queue for communicating benchmark results back to the # client. - if (not self._result_queue): + if (self.with_result_queue and not self._result_queue): self._result_queue = GCPQueue( - fname, + f"{application_name}-result", QueueType.RESULT, self.region ) @@ -291,15 +368,15 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") - response = "" - while (response == ""): - response = self.result_queue.receive_message() + results = self.collect_async_results(self.result_queue) - end = datetime.datetime.now() + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) - result = ExecutionResult.from_times(begin, end) - result.parse_benchmark_output(json.loads(response)) - return result + return ret def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -313,14 +390,16 @@ def serialize(self) -> dict: "name": self.name, "bucket_name": self.bucket_name, "region": self.region, - "result_queue": self.result_queue.serialize() + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, } @staticmethod def deserialize(obj: dict) -> Trigger: return StorageTrigger( - obj["name"], - obj["bucket_name"], - obj["region"], - GCPQueue.deserialize(obj["result_queue"]) + fname=obj["name"], + bucket_name=obj["bucket_name"], + region=obj["region"], + result_queue=GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], ) diff --git a/sebs/sebs.py b/sebs/sebs.py index 58bc07a9..149f0090 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -162,6 +162,7 @@ def get_benchmark( name: str, deployment: FaaSSystem, config: ExperimentConfig, + app_function_name: Optional[str] = None, logging_filename: Optional[str] = None, ) -> Benchmark: benchmark = Benchmark( @@ -172,6 +173,7 @@ def get_benchmark( self._output_dir, self.cache_client, self.docker_client, + app_function_name=app_function_name ) benchmark.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename diff --git a/sebs/utils.py b/sebs/utils.py index 3df8ffc9..995a1354 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -128,13 +128,19 @@ def configure_logging(): :param benchmark: Benchmark name. :param path: Path for lookup, relative to repository. + :param function: [Optional, for apps] the particular function we are + looking for. :return: relative path to directory corresponding to benchmark """ -def find_benchmark(benchmark: str, path: str): +def find_benchmark(benchmark: str, path: str, function: Optional[str] = None): benchmarks_dir = os.path.join(PROJECT_DIR, path) benchmark_path = find(benchmark, benchmarks_dir) + + if (function): + benchmark_path = find(function, benchmark_path) + return benchmark_path From 28da75654349041883f6bf441518e220995adf96 Mon Sep 17 00:00:00 2001 From: orosca Date: Thu, 31 Oct 2024 10:31:20 +0100 Subject: [PATCH 21/26] Handler and storage wrappers --- benchmarks/wrappers/aws/python/handler.py | 34 +++++++++++------- benchmarks/wrappers/aws/python/storage.py | 18 ++++++++-- benchmarks/wrappers/azure/python/handler.py | 38 ++++++++++++--------- benchmarks/wrappers/azure/python/storage.py | 26 +++++++++++--- benchmarks/wrappers/gcp/python/handler.py | 36 +++++++++++-------- benchmarks/wrappers/gcp/python/storage.py | 19 ++++++++++- sebs/azure/azure.py | 6 ++-- 7 files changed, 122 insertions(+), 55 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index a100393a..e4e67a0c 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -26,7 +26,7 @@ def handler(event, context): storage_inst = storage.storage.get_instance() obj = storage_inst.get_object(bucket_name, file_name) - event = json.loads(obj['Body'].read()) + event = json.loads(obj) return_http = False @@ -34,6 +34,7 @@ def handler(event, context): if 'body' in event: event = json.loads(event['body']) + # Run function and measure. req_id = context.aws_request_id event['request-id'] = req_id event['income-timestamp'] = income_timestamp @@ -45,6 +46,10 @@ def handler(event, context): log_data = { 'output': ret['result'] } + if 'fns_triggered' in ret and ret['fns_triggered'] > 0: + log_data['fns_triggered'] = ret['fns_triggered'] + if 'parent_execution_id' in ret: + log_data['parent_execution_id'] = ret['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in event: @@ -87,19 +92,22 @@ def handler(event, context): 'container_id': container_id, }) - # HTTP or library trigger: return an HTTP response. - if (return_http): + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + + if (return_http or result_queue is None): + # HTTP / library trigger, standalone function: return an HTTP response. return { 'statusCode': 200, 'body': stats } - - # Queue or storage trigger: return via a result queue. - arn = context.invoked_function_arn.split(":") - region = arn[3] - account_id = arn[4] - queue_name = f"{arn[6]}-result" - - from function import queue - queue_client = queue.queue(queue_name, account_id, region) - queue_client.send_message(stats) + else: + # Queue trigger, storage trigger, or application: write to a queue. + arn = context.invoked_function_arn.split(":") + region = arn[3] + account_id = arn[4] + queue_name = result_queue + + from function import queue + queue_client = queue.queue(queue_name, account_id, region) + queue_client.send_message(stats) diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 602319df..524beda5 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -21,8 +21,10 @@ def unique_name(name): random=str(uuid.uuid4()).split('-')[0] ) - def upload(self, bucket, file, filepath): + def upload(self, bucket, file, filepath, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file self.client.upload_file(filepath, bucket, key_name) return key_name @@ -48,9 +50,19 @@ def download_stream(self, bucket, file): return data.getbuffer() def get_object(self, bucket, file): - return self.client.get_object(Bucket=bucket, Key=file) - + obj = self.client.get_object(Bucket=bucket, Key=file) + return obj['Body'].read().decode('utf-8') + def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance + + def list_blobs(self, bucket): + res = self.client.list_objects(Bucket=bucket) + + objs = [] + for obj in res['Contents']: + objs.append(obj['Key']) + + return objs diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 70843b6e..fc1a646f 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -12,6 +12,7 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() + if 'connection_string' in req_json: os.environ['STORAGE_CONNECTION_STRING'] = req_json['connection_string'] @@ -23,7 +24,6 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo def handler_queue(msg: func.QueueMessage, context: func.Context): income_timestamp = datetime.datetime.now().timestamp() - logging.info('Python queue trigger function processed a queue item.') payload = msg.get_json() payload['request-id'] = context.invocation_id @@ -31,19 +31,20 @@ def handler_queue(msg: func.QueueMessage, context: func.Context): stats = measure(payload) - queue_name = f"{os.getenv('WEBSITE_SITE_NAME')}-result" - storage_account = os.getenv('STORAGE_ACCOUNT') - logging.info(queue_name) - logging.info(storage_account) + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + storage_account = os.getenv('DATA_STORAGE_ACCOUNT') + + if (result_queue and storage_account): + storage_account = os.getenv('STORAGE_ACCOUNT') - from . import queue - queue_client = queue.queue(queue_name, storage_account) - queue_client.send_message(stats) + from . import queue + queue_client = queue.queue(result_queue, storage_account) + queue_client.send_message(stats) def handler_storage(blob: func.InputStream, context: func.Context): income_timestamp = datetime.datetime.now().timestamp() - logging.info('Python Blob trigger function processed %s', blob.name) payload = json.loads(blob.readline().decode('utf-8')) payload['request-id'] = context.invocation_id @@ -51,14 +52,15 @@ def handler_storage(blob: func.InputStream, context: func.Context): stats = measure(payload) - queue_name = f"{os.getenv('WEBSITE_SITE_NAME')}-result" - storage_account = os.getenv('STORAGE_ACCOUNT') - logging.info(queue_name) - logging.info(storage_account) + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + storage_account = os.getenv('DATA_STORAGE_ACCOUNT') + + if (result_queue and storage_account): - from . import queue - queue_client = queue.queue(queue_name, storage_account) - queue_client.send_message(stats) + from . import queue + queue_client = queue.queue(result_queue, storage_account) + queue_client.send_message(stats) # Contains generic logic for gathering measurements for the function at hand, # given a request JSON. Used by all handlers, regardless of the trigger. @@ -74,6 +76,10 @@ def measure(req_json) -> str: log_data = { 'output': ret['result'] } + if 'fns_triggered' in ret and ret['fns_triggered'] > 0: + log_data['fns_triggered'] = ret['fns_triggered'] + if 'parent_execution_id' in ret: + log_data['parent_execution_id'] = ret['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in req_json: diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 74c08307..4938ed1c 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -22,9 +22,9 @@ def unique_name(name): random=str(uuid.uuid4()).split('-')[0] ) - def upload(self, container, file, filepath): + def upload(self, container, file, filepath, overwrite=False): with open(filepath, 'rb') as data: - return self.upload_stream(container, file, data) + return self.upload_stream(container, file, data, overwrite) def download(self, container, file, filepath): with open(filepath, 'wb') as download_file: @@ -39,13 +39,15 @@ def download_directory(self, container, prefix, path): os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(container, file_name, os.path.join(path, file_name)) - def upload_stream(self, container, file, data): + def upload_stream(self, container, file, data, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file client = self.client.get_blob_client( container=container, blob=key_name ) - client.upload_blob(data) + client.upload_blob(data, overwrite=overwrite) return key_name def download_stream(self, container, file): @@ -56,3 +58,19 @@ def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance + + def get_object(self, container, key): + blob_client = self.client.get_blob_client(container=container, blob=key) + downloader = blob_client.download_blob(max_concurrency=1, encoding='UTF-8') + return downloader.readall() + + def list_blobs(self, container): + client = self.client.get_container_client(container=container) + + # Azure returns an iterator. Turn it into a list. + objs = [] + res = client.list_blob_names() + for obj in res: + objs.append(obj) + + return objs diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index 51a9d604..6ce3c004 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -1,5 +1,6 @@ import base64, datetime, io, json, os, uuid, sys +from google.auth import default from google.cloud import storage as gcp_storage sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) @@ -25,13 +26,15 @@ def handler_queue(data, context): stats = measure(payload) - # Retrieve the project id and construct the result queue name. - project_id = context.resource.split("/")[1] - topic_name = f"{context.resource.split('/')[3]}-result" + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') - from function import queue - queue_client = queue.queue(topic_name, project_id) - queue_client.send_message(stats) + if (result_queue): + project_id = context.resource.split("/")[1] + + from function import queue + queue_client = queue.queue(result_queue, project_id) + queue_client.send_message(stats) def handler_storage(data, context): income_timestamp = datetime.datetime.now().timestamp() @@ -54,16 +57,15 @@ def handler_storage(data, context): stats = measure(payload) - # Retrieve the project id and construct the result queue name. - from google.auth import default - # Used to be an env var, now we need an additional request to the metadata - # server to retrieve it. - _, project_id = default() - topic_name = f"{context.resource['name'].split('/')[3]}-result" + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + + if (result_queue): + _, project_id = default() - from function import queue - queue_client = queue.queue(topic_name, project_id) - queue_client.send_message(stats) + from function import queue + queue_client = queue.queue(result_queue, project_id) + queue_client.send_message(stats) # Contains generic logic for gathering measurements for the function at hand, # given a request JSON. Used by all handlers, regardless of the trigger. @@ -80,6 +82,10 @@ def measure(req_json) -> str: log_data = { 'output': ret['result'] } + if 'fns_triggered' in ret and ret['fns_triggered'] > 0: + log_data['fns_triggered'] = ret['fns_triggered'] + if 'parent_execution_id' in ret: + log_data['parent_execution_id'] = ret['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in req_json: diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index 81163cb3..b08527b0 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -21,8 +21,10 @@ def unique_name(name): random=str(uuid.uuid4()).split('-')[0] ) - def upload(self, bucket, file, filepath): + def upload(self, bucket, file, filepath, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(key_name) blob.upload_from_filename(filepath) @@ -55,7 +57,22 @@ def download_stream(self, bucket, file): blob.download_to_file(data) return data.getbuffer() + def get_object(self, bucket, key): + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(key) + contents = blob.download_as_bytes() + return contents['Body'].read().decode('utf-8') + def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance + + def list_blobs(self, bucket): + res = self.client.list_blobs(bucket) + + objs = [] + for obj in res: + objs.append(obj.name) + + return objs diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index f96ba3f3..695d6102 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -171,7 +171,7 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "queueTrigger", "direction": "in", "queueName": benchmark, - "connection": "DATA_STORAGE_ACCOUNT_CONN_STR", + "connection": "STORAGE_CONNECTION_STRING", } ], } @@ -185,7 +185,7 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "blobTrigger", "direction": "in", "path": benchmark, - "connection": "DATA_STORAGE_ACCOUNT_CONN_STR", + "connection": "STORAGE_CONNECTION_STRING", } ], } @@ -461,7 +461,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct self.cli_instance.execute( f"az functionapp config appsettings set --name {func_name} " f" --resource-group {resource_group} " - f" --settings DATA_STORAGE_ACCOUNT_CONN_STR={data_storage_account.connection_string}" + f" --settings STORAGE_CONNECTION_STRING={data_storage_account.connection_string}" ) self.logging.info("Azure: Created function app {}".format(func_name)) From d3d5c71514e70bb5f498f2e6a2385dd0867a57ef Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 8 Nov 2024 21:16:41 +0100 Subject: [PATCH 22/26] Wrapper adjustments for applications --- benchmarks/wrappers/aws/python/handler.py | 19 ++++++++------- benchmarks/wrappers/aws/python/misc.py | 23 ++++++++++++++++++ benchmarks/wrappers/aws/python/queue.py | 7 ++++-- benchmarks/wrappers/aws/python/storage.py | 15 ++++++++---- benchmarks/wrappers/azure/python/handler.py | 19 ++++++++++----- benchmarks/wrappers/azure/python/misc.py | 26 +++++++++++++++++++++ benchmarks/wrappers/azure/python/queue.py | 13 +++++++---- benchmarks/wrappers/azure/python/storage.py | 10 +++++--- benchmarks/wrappers/gcp/python/handler.py | 20 +++++++++------- benchmarks/wrappers/gcp/python/misc.py | 20 ++++++++++++++++ benchmarks/wrappers/gcp/python/queue.py | 6 +++-- benchmarks/wrappers/gcp/python/storage.py | 16 +++++++++---- config/systems.json | 9 ++++--- docs/modularity.md | 3 ++- scripts/run_experiments.py | 1 + sebs/aws/aws.py | 16 +++++++------ sebs/aws/queue.py | 4 +++- sebs/aws/triggers.py | 9 ++++--- sebs/azure/azure.py | 12 +++++++--- sebs/azure/queue.py | 5 ++-- sebs/azure/triggers.py | 12 +++++----- sebs/gcp/gcp.py | 24 ++++++++++--------- sebs/gcp/triggers.py | 10 ++++---- 23 files changed, 214 insertions(+), 85 deletions(-) create mode 100644 benchmarks/wrappers/aws/python/misc.py create mode 100644 benchmarks/wrappers/azure/python/misc.py create mode 100644 benchmarks/wrappers/gcp/python/misc.py diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index e4e67a0c..c228c4da 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -7,6 +7,8 @@ def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() + populate_env_vars(context) + # Flag to indicate whether the measurements should be returned as an HTTP # response or via a result queue. return_http = True @@ -48,8 +50,8 @@ def handler(event, context): } if 'fns_triggered' in ret and ret['fns_triggered'] > 0: log_data['fns_triggered'] = ret['fns_triggered'] - if 'parent_execution_id' in ret: - log_data['parent_execution_id'] = ret['parent_execution_id'] + if 'parent_execution_id' in event: + log_data['parent_execution_id'] = event['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in event: @@ -103,11 +105,12 @@ def handler(event, context): } else: # Queue trigger, storage trigger, or application: write to a queue. - arn = context.invoked_function_arn.split(":") - region = arn[3] - account_id = arn[4] - queue_name = result_queue - from function import queue - queue_client = queue.queue(queue_name, account_id, region) + queue_client = queue.queue(result_queue) queue_client.send_message(stats) + +def populate_env_vars(context): + arn = context.invoked_function_arn.split(":") + + os.environ['REGION'] = arn[3] + os.environ['ACCOUNT_ID'] = arn[4] diff --git a/benchmarks/wrappers/aws/python/misc.py b/benchmarks/wrappers/aws/python/misc.py new file mode 100644 index 00000000..92f0d565 --- /dev/null +++ b/benchmarks/wrappers/aws/python/misc.py @@ -0,0 +1,23 @@ +import os + +def function_name( + fname: str, + language: str, + version: str, + trigger: str +): + app_name = os.getenv('APP_NAME') + full_name = f'{app_name}_{fname}_{language}_{version}' + full_name = full_name.replace('.', '_') + + if (trigger == 'storage'): + full_name = full_name.replace('_', '-') + + return full_name + +def object_path(path: str, key: str): + app_name = os.getenv('APP_NAME') + path = f'{app_name}-{path}/{key}' + path = path.replace('_', '-') + + return path diff --git a/benchmarks/wrappers/aws/python/queue.py b/benchmarks/wrappers/aws/python/queue.py index 95cde8a7..ac13f3c0 100644 --- a/benchmarks/wrappers/aws/python/queue.py +++ b/benchmarks/wrappers/aws/python/queue.py @@ -1,9 +1,12 @@ -import boto3 +import boto3, os class queue: client = None - def __init__(self, queue_name: str, account_id: str, region: str): + def __init__(self, queue_name: str): + account_id = os.getenv('ACCOUNT_ID') + region = os.getenv('REGION') + self.client = boto3.client('sqs', region_name=region) self.queue_url = f"https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}" diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 524beda5..111dd8b9 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -39,8 +39,10 @@ def download_directory(self, bucket, prefix, path): os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) - def upload_stream(self, bucket, file, data): + def upload_stream(self, bucket, file, data, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file self.client.upload_fileobj(data, bucket, key_name) return key_name @@ -51,18 +53,23 @@ def download_stream(self, bucket, file): def get_object(self, bucket, file): obj = self.client.get_object(Bucket=bucket, Key=file) - return obj['Body'].read().decode('utf-8') + return obj['Body'].read() def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance - def list_blobs(self, bucket): - res = self.client.list_objects(Bucket=bucket) + def list_objects(self, bucket, prefix=None): + if (not prefix): + prefix = '' + res = self.client.list_objects(Bucket=bucket, Prefix=prefix) objs = [] for obj in res['Contents']: objs.append(obj['Key']) return objs + + def delete_object(self, bucket, key): + self.client.delete_object(Bucket=bucket, Key=key) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index fc1a646f..9e025969 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -24,6 +24,8 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo def handler_queue(msg: func.QueueMessage, context: func.Context): income_timestamp = datetime.datetime.now().timestamp() + populate_env_vars() + payload = msg.get_json() payload['request-id'] = context.invocation_id @@ -36,16 +38,18 @@ def handler_queue(msg: func.QueueMessage, context: func.Context): storage_account = os.getenv('DATA_STORAGE_ACCOUNT') if (result_queue and storage_account): - storage_account = os.getenv('STORAGE_ACCOUNT') from . import queue - queue_client = queue.queue(result_queue, storage_account) + queue_client = queue.queue(result_queue) queue_client.send_message(stats) def handler_storage(blob: func.InputStream, context: func.Context): income_timestamp = datetime.datetime.now().timestamp() + populate_env_vars() + payload = json.loads(blob.readline().decode('utf-8')) + logging.info(payload) payload['request-id'] = context.invocation_id payload['income-timestamp'] = income_timestamp @@ -59,7 +63,7 @@ def handler_storage(blob: func.InputStream, context: func.Context): if (result_queue and storage_account): from . import queue - queue_client = queue.queue(result_queue, storage_account) + queue_client = queue.queue(result_queue) queue_client.send_message(stats) # Contains generic logic for gathering measurements for the function at hand, @@ -78,8 +82,8 @@ def measure(req_json) -> str: } if 'fns_triggered' in ret and ret['fns_triggered'] > 0: log_data['fns_triggered'] = ret['fns_triggered'] - if 'parent_execution_id' in ret: - log_data['parent_execution_id'] = ret['parent_execution_id'] + if 'parent_execution_id' in req_json: + log_data['parent_execution_id'] = req_json['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in req_json: @@ -125,4 +129,7 @@ def measure(req_json) -> str: 'container_id': container_id, 'environ_container_id': os.environ['CONTAINER_NAME'], 'request_id': req_id - }) \ No newline at end of file + }) + +def populate_env_vars(): + os.environ['ACCOUNT_ID'] = os.getenv('DATA_STORAGE_ACCOUNT') diff --git a/benchmarks/wrappers/azure/python/misc.py b/benchmarks/wrappers/azure/python/misc.py new file mode 100644 index 00000000..714470e0 --- /dev/null +++ b/benchmarks/wrappers/azure/python/misc.py @@ -0,0 +1,26 @@ +import os + +def function_name( + fname: str, + language: str, + version: str, + trigger: str +): + app_name = os.getenv('APP_NAME') + app_name = app_name[:app_name.rfind('-')] + + storage_account = os.getenv('ACCOUNT_ID') + storage_account = storage_account[7:] + + full_name = f"{app_name}-{fname}-{language}-{version}-{storage_account}-{trigger}" + full_name = full_name.replace(".", "-") + full_name = full_name.replace("_", "-") + + return full_name + +def object_path(path: str, key: str): + app_name = os.getenv('APP_NAME') + path = f"{app_name}-{path}/{key}" + path = path.replace("_", "-") + + return path diff --git a/benchmarks/wrappers/azure/python/queue.py b/benchmarks/wrappers/azure/python/queue.py index 93824181..465ea057 100644 --- a/benchmarks/wrappers/azure/python/queue.py +++ b/benchmarks/wrappers/azure/python/queue.py @@ -1,15 +1,20 @@ +import os + from azure.identity import ManagedIdentityCredential -from azure.storage.queue import QueueClient +from azure.storage.queue import QueueClient, BinaryBase64DecodePolicy, BinaryBase64EncodePolicy class queue: client = None - def __init__(self, queue_name: str, storage_account: str): + def __init__(self, queue_name: str): + storage_account = os.getenv('ACCOUNT_ID') account_url = f"https://{storage_account}.queue.core.windows.net" managed_credential = ManagedIdentityCredential() self.client = QueueClient(account_url, queue_name=queue_name, - credential=managed_credential) + credential=managed_credential, + message_encode_policy=BinaryBase64EncodePolicy(), + message_decode_policy=BinaryBase64DecodePolicy()) def send_message(self, message: str): - self.client.send_message(message) + self.client.send_message(message.encode('utf-8')) diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 4938ed1c..4257c48a 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -61,16 +61,20 @@ def get_instance(): def get_object(self, container, key): blob_client = self.client.get_blob_client(container=container, blob=key) - downloader = blob_client.download_blob(max_concurrency=1, encoding='UTF-8') + downloader = blob_client.download_blob() return downloader.readall() - def list_blobs(self, container): + def list_objects(self, container, prefix=None): client = self.client.get_container_client(container=container) # Azure returns an iterator. Turn it into a list. objs = [] - res = client.list_blob_names() + res = client.list_blob_names(name_starts_with=prefix) for obj in res: objs.append(obj) return objs + + def delete_object(self, bucket, key): + blob_client = self.client.get_blob_client(container=bucket, blob=key) + blob_client.delete_blob(delete_snapshots="include") diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index 6ce3c004..0e1cbf03 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -18,6 +18,8 @@ def handler_http(req): def handler_queue(data, context): income_timestamp = datetime.datetime.now().timestamp() + populate_env_vars() + serialized_payload = data.get('data') payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) @@ -30,15 +32,15 @@ def handler_queue(data, context): result_queue = os.getenv('RESULT_QUEUE') if (result_queue): - project_id = context.resource.split("/")[1] - from function import queue - queue_client = queue.queue(result_queue, project_id) + queue_client = queue.queue(result_queue) queue_client.send_message(stats) def handler_storage(data, context): income_timestamp = datetime.datetime.now().timestamp() + populate_env_vars() + bucket_name = data.get('bucket') name = data.get('name') filepath = '/tmp/bucket_contents' @@ -61,10 +63,8 @@ def handler_storage(data, context): result_queue = os.getenv('RESULT_QUEUE') if (result_queue): - _, project_id = default() - from function import queue - queue_client = queue.queue(result_queue, project_id) + queue_client = queue.queue(result_queue) queue_client.send_message(stats) # Contains generic logic for gathering measurements for the function at hand, @@ -84,8 +84,8 @@ def measure(req_json) -> str: } if 'fns_triggered' in ret and ret['fns_triggered'] > 0: log_data['fns_triggered'] = ret['fns_triggered'] - if 'parent_execution_id' in ret: - log_data['parent_execution_id'] = ret['parent_execution_id'] + if 'parent_execution_id' in req_json: + log_data['parent_execution_id'] = req_json['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in req_json: @@ -127,3 +127,7 @@ def measure(req_json) -> str: 'cold_start_var': cold_start_var, 'container_id': container_id, }) + +def populate_env_vars(): + _, project_id = default() + os.environ['ACCOUNT_ID'] = project_id diff --git a/benchmarks/wrappers/gcp/python/misc.py b/benchmarks/wrappers/gcp/python/misc.py new file mode 100644 index 00000000..dab78bf5 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/misc.py @@ -0,0 +1,20 @@ +import os + +def function_name( + fname: str, + language: str, + version: str, + trigger: str +): + app_name = os.getenv('APP_NAME') + full_name = f"{app_name}_{fname}_{language}_{version}-{trigger}" + full_name = full_name.replace(".", "_") + + return full_name + +def object_path(path: str, key: str): + app_name = os.getenv('APP_NAME') + path = f"{app_name}-{path}/{key}" + path = path.replace("_", "-") + + return path diff --git a/benchmarks/wrappers/gcp/python/queue.py b/benchmarks/wrappers/gcp/python/queue.py index b6e009e7..f3ba7d35 100644 --- a/benchmarks/wrappers/gcp/python/queue.py +++ b/benchmarks/wrappers/gcp/python/queue.py @@ -1,12 +1,14 @@ +import os + from google.cloud import pubsub_v1 class queue: client = None - def __init__(self, topic_name: str, project_id: str): + def __init__(self, topic_name: str): self.client = pubsub_v1.PublisherClient() self.topic_name = 'projects/{project_id}/topics/{topic}'.format( - project_id=project_id, + project_id=os.getenv('ACCOUNT_ID'), topic=topic_name, ) diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index b08527b0..6ce891a6 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -1,4 +1,5 @@ import io +import json import os import uuid @@ -43,8 +44,10 @@ def download_directory(self, bucket, prefix, path): os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) - def upload_stream(self, bucket, file, data): + def upload_stream(self, bucket, file, data, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(key_name) blob.upload_from_file(data) @@ -61,18 +64,23 @@ def get_object(self, bucket, key): bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(key) contents = blob.download_as_bytes() - return contents['Body'].read().decode('utf-8') + return contents def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance - def list_blobs(self, bucket): - res = self.client.list_blobs(bucket) + def list_objects(self, bucket, prefix=None): + res = self.client.list_blobs(bucket, prefix=prefix) objs = [] for obj in res: objs.append(obj.name) return objs + + def delete_object(self, bucket, key): + bucket = self.client.bucket(bucket) + blob = bucket.blob(key) + blob.delete() diff --git a/config/systems.json b/config/systems.json index 88358b60..143687c8 100644 --- a/config/systems.json +++ b/config/systems.json @@ -72,7 +72,8 @@ "files": [ "handler.py", "storage.py", - "queue.py" + "queue.py", + "misc.py" ], "packages": [] } @@ -116,7 +117,8 @@ "files": [ "handler.py", "storage.py", - "queue.py" + "queue.py", + "misc.py" ], "packages": [ "azure-storage-blob", @@ -167,7 +169,8 @@ "files": [ "handler.py", "storage.py", - "queue.py" + "queue.py", + "misc.py" ], "packages": [ "google-cloud-storage" diff --git a/docs/modularity.md b/docs/modularity.md index f6015b8e..736c2f9d 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -268,7 +268,8 @@ Check other platforms to see how configuration is defined, for example, for AWS: "files": [ "handler.py", "storage.py", - "queue.py" + "queue.py", + "misc.py" ], "packages": [] } diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index c9167553..ffff5b9f 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -446,6 +446,7 @@ def __init__(self, cache_client, config, docker_client, language): - function.py - storage.py - queue.py + - misc.py - resources handler.py diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index b147c996..a31f8dbc 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -120,6 +120,7 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: - function.py - storage.py - queue.py + - misc.py - resources handler.py @@ -226,10 +227,14 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} + env_vars = {} # Result queue added as an env variable. - result_queue_env = {} if (code_package.benchmark_config.result_queue): - result_queue_env["RESULT_QUEUE"] = code_package.benchmark_config.result_queue + env_vars["RESULT_QUEUE"] = code_package.benchmark_config.result_queue + + # Application name added as an env variable. + if (code_package.application_name): + env_vars["APP_NAME"] = code_package.application_name ret = self.client.create_function( FunctionName=func_name, @@ -241,7 +246,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun MemorySize=memory, Timeout=timeout, Code=code_config, - Environment={"Variables": result_queue_env} + Environment={"Variables": env_vars} ) lambda_function = LambdaFunction( @@ -341,10 +346,7 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark @staticmethod def default_application_name(code_package: Benchmark) -> str: - app_name = "{}-{}-{}".format( - code_package.application_name, code_package.language_name, code_package.language_version - ) - return AWS.format_function_name(app_name) + return AWS.format_function_name(code_package.application_name) @staticmethod def default_function_name(code_package: Benchmark) -> str: diff --git a/sebs/aws/queue.py b/sebs/aws/queue.py index d2d7f3a3..6a599e2a 100644 --- a/sebs/aws/queue.py +++ b/sebs/aws/queue.py @@ -55,7 +55,9 @@ def create_queue(self) -> str: self._queue_url = self.client.create_queue( QueueName=self.name, Attributes={ - "VisibilityTimeout": "3600" + # This currently works well in all cases - however it could be + # beneficial to adjust it based on the function's timeout. + "VisibilityTimeout": "540" } )["QueueUrl"] self._queue_arn = self.client.get_queue_attributes( diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f4717137..cb595191 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -317,11 +317,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return QueueTrigger( - obj["name"], - None, - SQS.deserialize(obj["queue"]), - SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, - obj["with_result_queue"] + fname=obj["name"], + queue=SQS.deserialize(obj["queue"]), + result_queue=SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"] ) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 695d6102..054259fd 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -362,10 +362,8 @@ def _mount_function_code(self, code_package: Benchmark): def default_application_name(self, code_package: Benchmark) -> str: func_name = ( - "{}-{}-{}-{}".format( + "{}-{}".format( code_package.application_name, - code_package.language_name, - code_package.language_version, self.config.resources.resources_id, ) .replace(".", "-") @@ -448,6 +446,14 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct f" --settings {result_queue_env}" ) + # Add application name env var. + app_name_env = f"APP_NAME={code_package.application_name}" + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings {app_name_env}" + ) + # Set the data storage account as env vars in the function. resource_group = self.config.resources.resource_group(self.cli_instance) data_storage_account = self.config.resources.data_storage_account(self.cli_instance) diff --git a/sebs/azure/queue.py b/sebs/azure/queue.py index a9698254..1dcbf922 100644 --- a/sebs/azure/queue.py +++ b/sebs/azure/queue.py @@ -1,4 +1,4 @@ -import time +import base64, time from sebs.faas.queue import Queue, QueueType @@ -75,7 +75,8 @@ def receive_message(self) -> str: for msg in response: self.logging.info(f"Received a message from {self.name}") self.client.delete_message(msg) - return msg.content + msg = base64.b64decode(msg.content) + return msg self.logging.info("No messages to be received") diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 95d06a2f..b6c3b068 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -210,12 +210,12 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return QueueTrigger( - obj["name"], - obj["storage_account"], - obj["region"], - AzureQueue.deserialize(obj["queue"]), - AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, - obj["with_result_queue"], + fname=obj["name"], + storage_account=obj["storage_account"], + region=obj["region"], + queue=AzureQueue.deserialize(obj["queue"]), + result_queue=AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], ) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 6412c3c0..ea3fd73f 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -193,11 +193,7 @@ def create_trigger_resource(self, func_name: str, cached=False) -> Dict: @staticmethod def default_application_name(code_package: Benchmark) -> str: - # Create function name - func_name = "{}-{}-{}".format( - code_package.application_name, code_package.language_name, code_package.language_version - ) - return GCP.format_function_name(func_name) + return GCP.format_function_name(code_package.application_name) @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -313,8 +309,11 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti full_func_name = GCP.get_full_function_name(project_name, location, func_name) get_req = self.function_client.projects().locations().functions().get(name=full_func_name) - # Add result queue env var. - result_queue_env = {"RESULT_QUEUE": code_package.benchmark_config.result_queue} + # Add result queue and application name env vars. + env_vars = { + "RESULT_QUEUE": code_package.benchmark_config.result_queue, + "APP_NAME": code_package.application_name + } try: get_req.execute() @@ -338,7 +337,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti "timeout": str(timeout) + "s", "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - "environmentVariables": result_queue_env, + "environmentVariables": env_vars, } | trigger_info, ) @@ -490,8 +489,11 @@ def update_function(self, function: Function, code_package: Benchmark): # bucket) exist on GCP. trigger_info = self.create_trigger_resource(function.name, cached=True) - # Add result queue env var. - result_queue_env = {"RESULT_QUEUE": code_package.benchmark_config.result_queue} + # Add result queue and applcation name env vars. + env_vars = { + "RESULT_QUEUE": code_package.benchmark_config.result_queue, + "APP_NAME": code_package.application_name + } req = ( self.function_client.projects() @@ -505,7 +507,7 @@ def update_function(self, function: Function, code_package: Benchmark): "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - "environmentVariables": result_queue_env, + "environmentVariables": env_vars, } | trigger_info, ) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 80942806..72bdff10 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -289,11 +289,11 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return QueueTrigger( - obj["name"], - obj["queue_name"], - obj["region"], - GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, - obj["with_result_queue"], + fname=obj["name"], + queue_name=obj["queue_name"], + region=obj["region"], + result_queue=GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], ) From 59a6c56ec060a13d0983de671e5d8653d4567904 Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 8 Nov 2024 21:19:16 +0100 Subject: [PATCH 23/26] Application: Airline Booking --- .../cancel_booking/config.json | 6 + .../cancel_booking/python/function.py | 93 +++++++++++++++ .../cancel_booking/python/requirements.txt | 0 .../collect_payment/config.json | 6 + .../collect_payment/python/function.py | 104 +++++++++++++++++ .../collect_payment/python/requirements.txt | 0 .../130.airline-booking/config.json | 4 + .../confirm_booking/config.json | 6 + .../confirm_booking/python/function.py | 92 +++++++++++++++ .../confirm_booking/python/requirements.txt | 0 .../100.webapps/130.airline-booking/input.py | 19 ++++ .../notify_booking/config.json | 6 + .../notify_booking/python/function.py | 63 +++++++++++ .../notify_booking/python/requirements.txt | 0 .../release_flight/config.json | 6 + .../release_flight/python/function.py | 48 ++++++++ .../release_flight/python/requirements.txt | 0 .../reserve_booking/config.json | 6 + .../reserve_booking/python/function.py | 106 ++++++++++++++++++ .../reserve_booking/python/requirements.txt | 0 .../reserve_flight/config.json | 7 ++ .../reserve_flight/python/function.py | 75 +++++++++++++ .../reserve_flight/python/requirements.txt | 0 23 files changed, 647 insertions(+) create mode 100644 benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json create mode 100644 benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/cancel_booking/python/requirements.txt create mode 100644 benchmarks/100.webapps/130.airline-booking/collect_payment/config.json create mode 100644 benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/collect_payment/python/requirements.txt create mode 100644 benchmarks/100.webapps/130.airline-booking/config.json create mode 100644 benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json create mode 100755 benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/confirm_booking/python/requirements.txt create mode 100644 benchmarks/100.webapps/130.airline-booking/input.py create mode 100644 benchmarks/100.webapps/130.airline-booking/notify_booking/config.json create mode 100644 benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/notify_booking/python/requirements.txt create mode 100644 benchmarks/100.webapps/130.airline-booking/release_flight/config.json create mode 100644 benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/release_flight/python/requirements.txt create mode 100644 benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json create mode 100755 benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/reserve_booking/python/requirements.txt create mode 100644 benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json create mode 100644 benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py create mode 100644 benchmarks/100.webapps/130.airline-booking/reserve_flight/python/requirements.txt diff --git a/benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json b/benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py new file mode 100644 index 00000000..c5824d07 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py @@ -0,0 +1,93 @@ +import datetime, json, os + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'booking_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('booking_table') + + +def handler(event): + """AWS Lambda Function entrypoint to cancel booking + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + chargeId: string + pre-authorization charge ID + + context: object, required + Lambda Context runtime methods and attributes + Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html + + Returns + ------- + boolean + + Raises + ------ + BookingCancellationException + Booking Cancellation Exception including error message upon failure + """ + if ('booking_id' not in event): + raise ValueError('Invalid booking ID') + + booking_id = event['booking_id'] + + print(f'Cancelling booking - {booking_id}') + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # ret = table.update_item( + # Key={'id': booking_id}, + # ConditionExpression='id = :idVal', + # UpdateExpression='SET #STATUS = :cancelled', + # ExpressionAttributeNames={'#STATUS': 'status'}, + # ExpressionAttributeValues={':idVal': booking_id, ':cancelled': 'CANCELLED'}, + # ) + update_end = datetime.datetime.now() + + release_flight_input = { + 'outbound_flight_id': event['outbound_flight_id'], + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='release_flight', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(release_flight_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" diff --git a/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/collect_payment/config.json b/benchmarks/100.webapps/130.airline-booking/collect_payment/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/collect_payment/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py b/benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py new file mode 100644 index 00000000..4596f605 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py @@ -0,0 +1,104 @@ +import datetime, json, os + +payment_endpoint = 'dummy' + +from . import misc +from . import queue + + +def handler(event): + """AWS Lambda Function entrypoint to collect payment + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + chargeId: string + pre-authorization charge ID + + Returns + ------- + dict + receiptUrl: string + receipt URL of charge collected + + price: int + amount collected + """ + if ('charge_id' not in event): + raise ValueError('Invalid Charge ID') + + pre_authorization_token = event['charge_id'] + customer_id = event['customer_id'] + + print(f'Collecting payment from customer {customer_id} using {pre_authorization_token} token') + if (not payment_endpoint): + raise ValueError('Payment API URL is invalid -- Consider reviewing PAYMENT_API_URL env') + + # This used to be an external API call: + # + # payment_payload = {'charge_id': charge_id} + # ret = requests.post(payment_endpoint, json=payment_payload) + # ret.raise_for_status() + # payment_response = ret.json() + + if (payment_successful()): + confirm_booking_input = { + 'customer_id': event['customer_id'], + 'booking_id': event['booking_id'], + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='confirm_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(confirm_booking_input)) + queue_end = datetime.datetime.now() + else: + cancel_booking_input = { + 'outbound_flight_id': event['outbound_flight_id'], + 'booking_id': event['booking_id'], + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='cancel_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(cancel_booking_input)) + queue_end = datetime.datetime.now() + + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'queue_time': queue_time + } + } + +def payment_successful(): + return True # False + + +""" +Sample input: +{ + "charge_id": "ch_1EeqlbF4aIiftV70qXHQewmn", + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/collect_payment/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/collect_payment/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/config.json b/benchmarks/100.webapps/130.airline-booking/config.json new file mode 100644 index 00000000..173009a5 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/config.json @@ -0,0 +1,4 @@ +{ + "type": "app", + "resources": [] +} \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json b/benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json new file mode 100644 index 00000000..ea0abe89 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python", "nodejs"], + "trigger": "queue" +} \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py new file mode 100755 index 00000000..99b057b1 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py @@ -0,0 +1,92 @@ +import datetime, json, os, secrets + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'booking_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('booking_table') + + +def handler(event): + """AWS Lambda Function entrypoint to confirm booking + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + bookingId: string + Unique Booking ID of an unconfirmed booking + + Returns + ------- + string + bookingReference generated + """ + if ('booking_id' not in event): + raise ValueError('Invalid booking ID') + + booking_id = event['booking_id'] + + print(f'Confirming booking - {booking_id}') + reference = secrets.token_urlsafe(4) + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # ret = table.update_item( + # Key={'id': booking_id}, + # ConditionExpression='id = :idVal', + # UpdateExpression='SET bookingReference = :br, #STATUS = :confirmed', + # ExpressionAttributeNames={'#STATUS': 'status'}, + # ExpressionAttributeValues={ + # ':br': reference, + # ':idVal': booking_id, + # ':confirmed': 'CONFIRMED', + # }, + # ReturnValues='UPDATED_NEW', + # ) + update_end = datetime.datetime.now() + + notify_booking_input = { + 'customer_id': event['customer_id'], + 'reference': reference, + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='notify_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(notify_booking_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/input.py b/benchmarks/100.webapps/130.airline-booking/input.py new file mode 100644 index 00000000..2f13bca6 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/input.py @@ -0,0 +1,19 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def upload_files(data_root, data_dir, upload_func): + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + '/' + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + input_config = {} + input_config['charge_id'] = 'ch_1EeqlbF4aIiftV70qXHQewmn' + input_config['customer_id'] = 'd749f277-0950-4ad6-ab04-98988721e475' + input_config['outbound_flight_id'] = 'fae7c68d-2683-4968-87a2-dfe2a090c2d1' + return input_config diff --git a/benchmarks/100.webapps/130.airline-booking/notify_booking/config.json b/benchmarks/100.webapps/130.airline-booking/notify_booking/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/notify_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py new file mode 100644 index 00000000..2ab8d4bd --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py @@ -0,0 +1,63 @@ +import datetime, json, os + + +def handler(event): + """AWS Lambda Function entrypoint to notify booking + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + customer_id: string + Unique Customer ID + + price: string + Flight price + + bookingReference: string + Confirmed booking reference + + context: object, required + Lambda Context runtime methods and attributes + Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html + + Returns + ------- + string + notificationId + Unique ID confirming notification delivery + + Raises + ------ + BookingNotificationException + Booking Notification Exception including error message upon failure + """ + if ('customer_id' not in event): + raise ValueError('Invalid customer ID') + + customer_id = event['customer_id'] + booking_reference = event['reference'] + + successful_subject = f'Booking confirmation for {booking_reference}' + unsuccessful_subject = f'Unable to process booking' + + subject = successful_subject if booking_reference else unsuccessful_subject + booking_status = 'confirmed' if booking_reference else 'cancelled' + + # Should we plan to support SNS-like cloud components in SeBS: + # + # payload = {'customerId': customer_id} + # ret = sns.publish( + # TopicArn=booking_sns_topic, + # Message=json.dumps(payload), + # Subject=subject, + # MessageAttributes={ + # 'Booking.Status': {'DataType': 'String', 'StringValue': booking_status} + # }, + # ) + + return { + 'result': 0, + 'measurement': {} + } diff --git a/benchmarks/100.webapps/130.airline-booking/notify_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/notify_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/release_flight/config.json b/benchmarks/100.webapps/130.airline-booking/release_flight/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/release_flight/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py b/benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py new file mode 100644 index 00000000..412665f1 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py @@ -0,0 +1,48 @@ +import datetime, json, os + +from . import misc + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'flight_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('flight_table') + + +def handler(event): + if ('outbound_flight_id' not in event): + raise ValueError('Invalid arguments') + + outbound_flight_id = event['outbound_flight_id'] + + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # table.update_item( + # Key={'id': outbound_flight_id}, + # ConditionExpression='id = :idVal',# AND seatCapacity < maximumSeating', + # UpdateExpression='SET seatCapacity = seatCapacity + :dec', + # ExpressionAttributeValues={ + # ':idVal': outbound_flight_id, + # ':dec': 1 + # }, + # ) + update_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'measurement': { + 'update_time': update_time + } + } + +""" +Sample input: +{ + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" diff --git a/benchmarks/100.webapps/130.airline-booking/release_flight/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/release_flight/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json b/benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py new file mode 100755 index 00000000..cc6e7ce7 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py @@ -0,0 +1,106 @@ +import datetime, json, os, uuid + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'booking_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('booking_table') + + +def is_booking_request_valid(booking): + return all(x in booking for x in ['outbound_flight_id', 'customer_id', 'charge_id']) + +def handler(event): + """AWS Lambda Function entrypoint to reserve a booking + + Parameters + ---------- + event: + chargeId: string + Pre-authorization payment token + + customerId: string + Customer unique identifier + + bookingOutboundFlightId: string + Outbound flight unique identifier + + Returns + ------- + bookingId: string + booking ID generated + """ + if (not is_booking_request_valid(event)): + raise ValueError('Invalid booking request') + + print(f"Reserving booking for customer {event['customer_id']}") + booking_id = str(uuid.uuid4()) + outbound_flight_id = event['outbound_flight_id'] + customer_id = event['customer_id'] + payment_token = event['charge_id'] + + booking_item = { + 'id': booking_id, + 'bookingOutboundFlightId': outbound_flight_id, + 'checkedIn': False, + 'customer': customer_id, + 'paymentToken': payment_token, + 'status': 'UNCONFIRMED', + 'createdAt': str(datetime.datetime.now()), + } + update_begin = datetime.datetime.now() + # table.put_item(Item=booking_item) + nosql_client.insert( + table_name=nosql_table_name, + data=booking_item, + ) + update_end = datetime.datetime.now() + + collect_payment_input = { + 'booking_id': booking_id, + 'customer_id': customer_id, + 'charge_id': payment_token, + 'outbound_flight_id': outbound_flight_id, + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='collect_payment', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(collect_payment_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurements': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "charge_id": "ch_1EeqlbF4aIiftV70qXHQewmn", + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json b/benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json new file mode 100644 index 00000000..5cfe2171 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue", + "entrypoint": true +} diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py b/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py new file mode 100644 index 00000000..ff303a08 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py @@ -0,0 +1,75 @@ +import datetime, json, os + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'flight_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('flight_table') + + +def handler(event): + if ('outbound_flight_id' not in event): + raise ValueError('Invalid arguments') + + outbound_flight_id = event['outbound_flight_id'] + + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # table.update_item( + # Key={"id": outbound_flight_id}, + # ConditionExpression="id = :idVal AND seatCapacity > :zero", + # UpdateExpression="SET seatCapacity = seatCapacity - :dec", + # ExpressionAttributeValues={ + # ":idVal": outbound_flight_id, + # ":dec": 1, + # ":zero": 0 + # }, + # ) + update_end = datetime.datetime.now() + + reserve_booking_input = { + 'charge_id': event['charge_id'], + 'customer_id': event['customer_id'], + 'outbound_flight_id': outbound_flight_id, + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='reserve_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(reserve_booking_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "charge_id": "ch_1EeqlbF4aIiftV70qXHQewmn", + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/requirements.txt new file mode 100644 index 00000000..e69de29b From 2048011fbcad8a489d1b708c1fb7e213b0729eb1 Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 8 Nov 2024 21:20:21 +0100 Subject: [PATCH 24/26] Application: Prediction Reviews --- .../420.prediction-reviews/config.json | 4 + .../420.prediction-reviews/input.py | 27 +++++ .../prediction/config.json | 6 ++ .../prediction/python/function.py | 78 +++++++++++++++ .../prediction/python/package.sh | 32 ++++++ .../prediction/python/requirements.txt | 4 + .../training/config.json | 7 ++ .../training/python/function.py | 99 +++++++++++++++++++ .../training/python/package.sh | 32 ++++++ .../training/python/requirements.txt | 4 + 10 files changed, 293 insertions(+) create mode 100644 benchmarks/400.inference/420.prediction-reviews/config.json create mode 100644 benchmarks/400.inference/420.prediction-reviews/input.py create mode 100644 benchmarks/400.inference/420.prediction-reviews/prediction/config.json create mode 100644 benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py create mode 100644 benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh create mode 100644 benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt create mode 100644 benchmarks/400.inference/420.prediction-reviews/training/config.json create mode 100644 benchmarks/400.inference/420.prediction-reviews/training/python/function.py create mode 100644 benchmarks/400.inference/420.prediction-reviews/training/python/package.sh create mode 100644 benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt diff --git a/benchmarks/400.inference/420.prediction-reviews/config.json b/benchmarks/400.inference/420.prediction-reviews/config.json new file mode 100644 index 00000000..97c03cd8 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/config.json @@ -0,0 +1,4 @@ +{ + "type": "app", + "resources": [] +} diff --git a/benchmarks/400.inference/420.prediction-reviews/input.py b/benchmarks/400.inference/420.prediction-reviews/input.py new file mode 100644 index 00000000..38f6106a --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/input.py @@ -0,0 +1,27 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def upload_files(data_root, data_dir, upload_func): + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + '/' + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + dataset_name = 'reviews50mb.csv' + upload_func(0, dataset_name, os.path.join(data_dir, 'dataset', dataset_name)) + + model_name = 'lr_model.pk' + # upload_func(0, model_name, os.path.join(data_dir, 'model', model_name)) + + input_config = {'dataset': {}, 'model': {}, 'bucket': {}} + input_config['dataset']['key'] = dataset_name + input_config['model']['key'] = model_name + input_config['bucket']['name'] = benchmarks_bucket + input_config['bucket']['path'] = input_paths[0] + input_config['input'] = 'The ambiance is magical. The food and service was nice! The lobster and cheese was to die for and our steaks were cooked perfectly.' + return input_config diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/config.json b/benchmarks/400.inference/420.prediction-reviews/prediction/config.json new file mode 100644 index 00000000..5131c929 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 1024, + "languages": ["python"], + "trigger": "queue" +} \ No newline at end of file diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py b/benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py new file mode 100644 index 00000000..6ccd8cb3 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py @@ -0,0 +1,78 @@ +import datetime, io, joblib, os, re, sys, zipfile + +from time import time + +from . import queue +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd + +from importlib.metadata import version + +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression + +cleanup_re = re.compile('[^a-z]+') +def cleanup(sentence): + sentence = sentence.lower() + sentence = cleanup_re.sub(' ', sentence).strip() + return sentence + +def handler(event): + x = event['input'] + bucket = event['bucket']['name'] + bucket_path = event['bucket']['path'] + dataset_key = event['dataset']['key'] + model_key = event['model']['key'] + + dataset_path = f'{bucket_path}/{dataset_key}' + model_path = f'{bucket_path}/{model_key}' + + dataset_local_path = '/tmp/' + dataset_key + model_local_path = '/tmp/' + model_key + + download_dataset_begin = datetime.datetime.now() + client.download(bucket, dataset_path, dataset_local_path) + download_dataset_end = datetime.datetime.now() + + download_model_begin = datetime.datetime.now() + client.download(bucket, model_path, model_local_path) + download_model_end = datetime.datetime.now() + + df = pd.read_csv(dataset_local_path) + + process_begin = datetime.datetime.now() + df_input = pd.DataFrame() + df_input['x'] = [x] + df_input['x'] = df_input['x'].apply(cleanup) + + df['train'] = df['Text'].apply(cleanup) + tfidf_vect = TfidfVectorizer(min_df=100).fit(df['train']) + X = tfidf_vect.transform(df_input['x']) + + model = joblib.load(model_local_path) + y = model.predict(X) + process_end = datetime.datetime.now() + + download_dataset_time = (download_dataset_end - download_dataset_begin) / datetime.timedelta(microseconds=1) + download_model_time = (download_model_end - download_model_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': 0, + 'measurement': { + 'download_dataset_time': download_dataset_time, + 'download_model_time': download_model_time, + 'process_time': process_time + } + } diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh b/benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt b/benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/420.prediction-reviews/training/config.json b/benchmarks/400.inference/420.prediction-reviews/training/config.json new file mode 100644 index 00000000..2bef2ce3 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 540, + "memory": 1024, + "languages": ["python"], + "trigger": "queue", + "entrypoint": true +} diff --git a/benchmarks/400.inference/420.prediction-reviews/training/python/function.py b/benchmarks/400.inference/420.prediction-reviews/training/python/function.py new file mode 100644 index 00000000..ad562e78 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/python/function.py @@ -0,0 +1,99 @@ +import datetime, io, joblib, json, os, re, sys, zipfile + +from time import time + +from . import misc +from . import queue +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd + +from importlib.metadata import version + +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression + +cleanup_re = re.compile('[^a-z]+') +def cleanup(sentence): + sentence = sentence.lower() + sentence = cleanup_re.sub(' ', sentence).strip() + return sentence + +def handler(event): + bucket = event['bucket']['name'] + bucket_path = event['bucket']['path'] + dataset_key = event['dataset']['key'] + model_key = event['model']['key'] + + dataset_path = f'{bucket_path}/{dataset_key}' + model_path = f'{bucket_path}/{model_key}' + + model_local_path = '/tmp/' + model_key + + download_begin = datetime.datetime.now() + dataset = client.get_object(bucket, dataset_path) + download_end = datetime.datetime.now() + + df = pd.read_csv(io.BytesIO(dataset)) + + process_begin = datetime.datetime.now() + df['train'] = df['Text'].apply(cleanup) + + tfidf_vector = TfidfVectorizer(min_df=100).fit(df['train']) + + train = tfidf_vector.transform(df['train']) + + model = LogisticRegression() + model.fit(train, df['Score']) + process_end = datetime.datetime.now() + + joblib.dump(model, model_local_path) + + upload_begin = datetime.datetime.now() + client.upload(bucket, model_path, model_local_path, True) + upload_end = datetime.datetime.now() + + prediction_input = {'dataset': {}, 'model': {}, 'bucket': {}} + prediction_input['input'] = event['input'] + prediction_input['bucket']['name'] = bucket + prediction_input['bucket']['path'] = bucket_path + prediction_input['dataset']['key'] = dataset_key + prediction_input['model']['key'] = model_key + prediction_input['parent_execution_id'] = event['request-id'] + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='prediction', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(prediction_input)) + queue_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': prediction_input, + 'fns_triggered': 1, + 'measurement': { + 'download_time': download_time, + 'process_time': process_time, + 'upload_time': upload_time, + 'queue_time': queue_time + } + } diff --git a/benchmarks/400.inference/420.prediction-reviews/training/python/package.sh b/benchmarks/400.inference/420.prediction-reviews/training/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt b/benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib From da56e974dd575f15b631d19e228d889d81b66009 Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 8 Nov 2024 21:21:50 +0100 Subject: [PATCH 25/26] Application: Feature Generation --- .../400.inference/430.feature-gen/config.json | 5 + .../430.feature-gen/extractor/config.json | 6 + .../extractor/python/function.py | 75 +++++++++++ .../extractor/python/package.sh | 32 +++++ .../extractor/python/requirements.txt | 4 + .../400.inference/430.feature-gen/input.py | 23 ++++ .../430.feature-gen/job_status/config.json | 6 + .../job_status/python/function.py | 47 +++++++ .../job_status/python/requirements.txt | 0 .../430.feature-gen/orchestrator/config.json | 7 + .../orchestrator/python/function.py | 120 ++++++++++++++++++ .../orchestrator/python/package.sh | 32 +++++ .../orchestrator/python/requirements.txt | 4 + .../430.feature-gen/reducer/config.json | 6 + .../reducer/python/function.py | 60 +++++++++ .../430.feature-gen/reducer/python/package.sh | 32 +++++ .../reducer/python/requirements.txt | 4 + 17 files changed, 463 insertions(+) create mode 100644 benchmarks/400.inference/430.feature-gen/config.json create mode 100644 benchmarks/400.inference/430.feature-gen/extractor/config.json create mode 100644 benchmarks/400.inference/430.feature-gen/extractor/python/function.py create mode 100644 benchmarks/400.inference/430.feature-gen/extractor/python/package.sh create mode 100644 benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt create mode 100644 benchmarks/400.inference/430.feature-gen/input.py create mode 100644 benchmarks/400.inference/430.feature-gen/job_status/config.json create mode 100644 benchmarks/400.inference/430.feature-gen/job_status/python/function.py create mode 100644 benchmarks/400.inference/430.feature-gen/job_status/python/requirements.txt create mode 100644 benchmarks/400.inference/430.feature-gen/orchestrator/config.json create mode 100644 benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py create mode 100644 benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh create mode 100644 benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt create mode 100644 benchmarks/400.inference/430.feature-gen/reducer/config.json create mode 100644 benchmarks/400.inference/430.feature-gen/reducer/python/function.py create mode 100644 benchmarks/400.inference/430.feature-gen/reducer/python/package.sh create mode 100644 benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt diff --git a/benchmarks/400.inference/430.feature-gen/config.json b/benchmarks/400.inference/430.feature-gen/config.json new file mode 100644 index 00000000..9f6b2d1e --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/config.json @@ -0,0 +1,5 @@ +{ + "type": "app", + "resources": [] + } + \ No newline at end of file diff --git a/benchmarks/400.inference/430.feature-gen/extractor/config.json b/benchmarks/400.inference/430.feature-gen/extractor/config.json new file mode 100644 index 00000000..70fe5fe1 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 300, + "memory": 512, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/400.inference/430.feature-gen/extractor/python/function.py b/benchmarks/400.inference/430.feature-gen/extractor/python/function.py new file mode 100644 index 00000000..6fde1bee --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/python/function.py @@ -0,0 +1,75 @@ +import datetime, io, json, os, re, sys, uuid, zipfile + +from . import misc +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd + + +cleanup_re = re.compile('[^a-z]+') + +def cleanup(sentence): + sentence = sentence.lower() + sentence = cleanup_re.sub(' ', sentence).strip() + return sentence + +def handler(event): + output_bucket = event['output_bucket']['name'] + dataset_key = event['object']['key'] + + # Cleanup the bucket between function iterations. + input_bucket = misc.function_name( + fname='extractor', + language='python', + version='3.9', + trigger='storage' + ) + delete_begin = datetime.datetime.now() + client.delete_object(input_bucket, dataset_key) + delete_end = datetime.datetime.now() + + # Do the work. + process_begin = datetime.datetime.now() + df = pd.read_json(event['input']) + + df['Text'] = df['Text'].apply(cleanup) + text = df['Text'].tolist() + result = set() + for item in text: + result.update(item.split()) + + feature = str(list(result)) + feature = feature.lstrip('[').rstrip(']').replace(' ', '') + process_end = datetime.datetime.now() + + key = misc.object_path('extractors_output', dataset_key.split('.')[0] + '.txt') + upload_start = datetime.datetime.now() + client.upload_stream( + output_bucket, + key, + io.BytesIO(feature.encode('utf-8')), + True + ) + upload_end = datetime.datetime.now() + + delete_time = (delete_end - delete_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_start) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'measurement': { + 'delete_time': delete_time, + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/400.inference/430.feature-gen/extractor/python/package.sh b/benchmarks/400.inference/430.feature-gen/extractor/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/430.feature-gen/input.py b/benchmarks/400.inference/430.feature-gen/input.py new file mode 100644 index 00000000..eef61e31 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/input.py @@ -0,0 +1,23 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def upload_files(data_root, data_dir, upload_func): + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + '/' + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + dataset_name = 'reviews10mb.csv' + upload_func(0, dataset_name, os.path.join(data_dir, 'dataset', dataset_name)) + + input_config = {'object': {}, 'bucket': {}} + input_config['object']['key'] = dataset_name + input_config['bucket']['name'] = benchmarks_bucket + input_config['bucket']['path'] = input_paths[0] + input_config['extractors'] = 5 + return input_config diff --git a/benchmarks/400.inference/430.feature-gen/job_status/config.json b/benchmarks/400.inference/430.feature-gen/job_status/config.json new file mode 100644 index 00000000..178b0bf7 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/job_status/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/400.inference/430.feature-gen/job_status/python/function.py b/benchmarks/400.inference/430.feature-gen/job_status/python/function.py new file mode 100644 index 00000000..5f2676da --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/job_status/python/function.py @@ -0,0 +1,47 @@ +import datetime, json, time + +from . import misc +from . import queue +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + bucket = event['output_bucket']['name'] + file_count = int(event['file_count']) + + wait_begin = datetime.datetime.now() + while (True): + objs = client.list_objects(bucket, misc.object_path('extractors_output', '')) + + if (file_count == len(objs)): + wait_end = datetime.datetime.now() + orchestrator_input = {'bucket': {}} + orchestrator_input['bucket']['name'] = bucket + orchestrator_input['start_reducer'] = True + orchestrator_input['parent_execution_id'] = event['request-id'] + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='orchestrator', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(orchestrator_input)) + queue_end = datetime.datetime.now() + + wait_time = (wait_end - wait_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': orchestrator_input, + 'fns_triggered': 1, + 'measurement': { + 'wait_time': wait_time, + 'queue_time': queue_time + } + } + else: + time.sleep(10) diff --git a/benchmarks/400.inference/430.feature-gen/job_status/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/job_status/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/config.json b/benchmarks/400.inference/430.feature-gen/orchestrator/config.json new file mode 100644 index 00000000..670c7d7f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 300, + "memory": 1024, + "languages": ["python"], + "trigger": "queue", + "entrypoint": true +} diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py b/benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py new file mode 100644 index 00000000..1521edb8 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py @@ -0,0 +1,120 @@ +import datetime, io, json, os, sys, uuid, zipfile + +from . import misc +from . import queue +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd +import numpy as np + + +def handler(event): + bucket = event['bucket']['name'] + + if ('start_reducer' in event): + reducer_input = {'bucket': {}} + reducer_input['bucket']['name'] = bucket + reducer_input['parent_execution_id'] = event['request-id'] + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='reducer', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(reducer_input)) + queue_end = datetime.datetime.now() + + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': reducer_input, + 'fns_triggered': 1, + 'measurement': { + 'queue_time': queue_time + } + } + + bucket_path = event['bucket']['path'] + dataset_key = event['object']['key'] + extractors = int(event['extractors']) + + dataset_path = f'{bucket_path}/{dataset_key}' + dataset_local_path = '/tmp/' + dataset_key + + download_start = datetime.datetime.now() + client.download(bucket, dataset_path, dataset_local_path) + download_end = datetime.datetime.now() + + process_start = datetime.datetime.now() + df = pd.read_csv(dataset_local_path) + shards = np.array_split(df, extractors) + process_end = datetime.datetime.now() + + # Prepare and send the output. Trigger 'extractors' and 'job_status'. + extractor_bucket = misc.function_name( + fname='extractor', + language='python', + version='3.9', + trigger='storage' + ) + + upload_start = datetime.datetime.now() + for shard in shards: + key = f'shard-{uuid.uuid4()}' + + extractor_input = {'object': {}, 'output_bucket': {}} + extractor_input['object']['key'] = key + extractor_input['output_bucket']['name'] = bucket + extractor_input['input'] = shard.to_json() + extractor_input['parent_execution_id'] = event['request-id'] + client.upload_stream( + extractor_bucket, + key, + io.BytesIO(json.dumps(extractor_input).encode('utf-8')), + True + ) + upload_end = datetime.datetime.now() + + job_status_input = {'output_bucket': {}} + job_status_input['output_bucket']['name'] = bucket + job_status_input['file_count'] = extractors + job_status_input['parent_execution_id'] = event['request-id'] + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='job_status', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(job_status_input)) + queue_end = datetime.datetime.now() + + download_time = (download_end - download_start) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_start) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_start) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': extractors + 1, + 'measurement': { + 'download_time': download_time, + 'process_time': process_time, + 'upload_time': upload_time, + 'queue_time': queue_time + } + } diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh b/benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/430.feature-gen/reducer/config.json b/benchmarks/400.inference/430.feature-gen/reducer/config.json new file mode 100644 index 00000000..b4d7b2b6 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 540, + "memory": 1024, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/400.inference/430.feature-gen/reducer/python/function.py b/benchmarks/400.inference/430.feature-gen/reducer/python/function.py new file mode 100644 index 00000000..cc7062a5 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/python/function.py @@ -0,0 +1,60 @@ +import datetime, io +from sklearn.feature_extraction.text import TfidfVectorizer + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + bucket = event['bucket']['name'] + + list_begin = datetime.datetime.now() + objs = client.list_objects(bucket, misc.object_path('extractors_output', '')) + list_end = datetime.datetime.now() + + result = [] + preprocess_begin = datetime.datetime.now() + for obj in objs: + body = str(client.get_object(bucket, obj)) + + word = body.replace("'", '').split(',') + result.extend(word) + preprocess_end = datetime.datetime.now() + + # Cleanup the bucket between function iterations. + delete_begin = datetime.datetime.now() + for obj in objs: + client.delete_object(bucket, obj) + delete_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + tfidf_vect = TfidfVectorizer().fit(result) + feature = str(tfidf_vect.get_feature_names_out()) + feature = feature.lstrip('[').rstrip(']').replace(' ' , '') + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + client.upload_stream( + bucket, + misc.object_path('reducer_output', 'feature'), + io.BytesIO(feature.encode('utf-8')), + True + ) + upload_end = datetime.datetime.now() + + list_time = (list_end - list_begin) / datetime.timedelta(microseconds=1) + preprocess_time = (preprocess_end - preprocess_begin) / datetime.timedelta(microseconds=1) + delete_time = (delete_end - delete_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'measurement': { + 'list_time': list_time, + 'preprocess_time': preprocess_time, + 'delete_time': delete_time, + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/400.inference/430.feature-gen/reducer/python/package.sh b/benchmarks/400.inference/430.feature-gen/reducer/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib From 7f6d7806123046e2ca98f737339f29043def3737 Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 8 Nov 2024 21:23:20 +0100 Subject: [PATCH 26/26] Application: Naive MapReduce --- .../500.scientific/505.map-reduce/config.json | 5 + .../500.scientific/505.map-reduce/input.py | 14 +++ .../505.map-reduce/mapper/config.json | 6 ++ .../505.map-reduce/mapper/python/function.py | 55 ++++++++++ .../mapper/python/requirements.txt | 0 .../505.map-reduce/reducer/config.json | 6 ++ .../505.map-reduce/reducer/python/function.py | 30 ++++++ .../reducer/python/requirements.txt | 0 .../505.map-reduce/sorter/config.json | 6 ++ .../505.map-reduce/sorter/python/function.py | 101 ++++++++++++++++++ .../sorter/python/requirements.txt | 0 .../505.map-reduce/splitter/config.json | 7 ++ .../splitter/python/function.py | 44 ++++++++ .../splitter/python/requirements.txt | 0 14 files changed, 274 insertions(+) create mode 100644 benchmarks/500.scientific/505.map-reduce/config.json create mode 100644 benchmarks/500.scientific/505.map-reduce/input.py create mode 100644 benchmarks/500.scientific/505.map-reduce/mapper/config.json create mode 100644 benchmarks/500.scientific/505.map-reduce/mapper/python/function.py create mode 100644 benchmarks/500.scientific/505.map-reduce/mapper/python/requirements.txt create mode 100644 benchmarks/500.scientific/505.map-reduce/reducer/config.json create mode 100644 benchmarks/500.scientific/505.map-reduce/reducer/python/function.py create mode 100644 benchmarks/500.scientific/505.map-reduce/reducer/python/requirements.txt create mode 100644 benchmarks/500.scientific/505.map-reduce/sorter/config.json create mode 100644 benchmarks/500.scientific/505.map-reduce/sorter/python/function.py create mode 100644 benchmarks/500.scientific/505.map-reduce/sorter/python/requirements.txt create mode 100644 benchmarks/500.scientific/505.map-reduce/splitter/config.json create mode 100644 benchmarks/500.scientific/505.map-reduce/splitter/python/function.py create mode 100644 benchmarks/500.scientific/505.map-reduce/splitter/python/requirements.txt diff --git a/benchmarks/500.scientific/505.map-reduce/config.json b/benchmarks/500.scientific/505.map-reduce/config.json new file mode 100644 index 00000000..fd954c87 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/config.json @@ -0,0 +1,5 @@ +{ + "type": "app", + "resources": [] +} + \ No newline at end of file diff --git a/benchmarks/500.scientific/505.map-reduce/input.py b/benchmarks/500.scientific/505.map-reduce/input.py new file mode 100644 index 00000000..13a5b03d --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/input.py @@ -0,0 +1,14 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + # Consider using a larger text file as input: + # input_text_file = '' + # upload_func(0, input_text_file, os.path.join(data_dir, 'input_text', input_text_file)) + + input_config = {} + input_config['mappers'] = 2 + input_config['text'] = 'the quick brown fox jumps jumps. over the lazy lazy lazy dog dog' + return input_config diff --git a/benchmarks/500.scientific/505.map-reduce/mapper/config.json b/benchmarks/500.scientific/505.map-reduce/mapper/config.json new file mode 100644 index 00000000..993a4481 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/mapper/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/500.scientific/505.map-reduce/mapper/python/function.py b/benchmarks/500.scientific/505.map-reduce/mapper/python/function.py new file mode 100644 index 00000000..43a9f850 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/mapper/python/function.py @@ -0,0 +1,55 @@ +import datetime, io, json, os, uuid + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + text = event['text'] + + # split by space + process_begin = datetime.datetime.now() + words = text.split(' ') + + # count for every word + counts = {} + for word in words: + if word not in counts: + counts[word] = 1 + else: + counts[word] += 1 + counts = dict(sorted(counts.items())) + process_end = datetime.datetime.now() + + sorter_input = { + 'counts': counts, + 'mappers': event['mappers'], + 'parent_execution_id': event['request-id'] + } + + file_name = f'payload{str(uuid.uuid4())}.json' + file_path = f'/tmp/{file_name}' + with open(file_path, 'w') as f: + f.write(json.dumps(sorter_input)) + + bucket = misc.function_name( + fname='sorter', + language='python', + version='3.9', + trigger='storage' + ) + upload_begin = datetime.datetime.now() + client.upload(bucket, file_name, file_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': counts, + 'fns_triggered': 1, + 'measurement': { + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/500.scientific/505.map-reduce/mapper/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/mapper/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/500.scientific/505.map-reduce/reducer/config.json b/benchmarks/500.scientific/505.map-reduce/reducer/config.json new file mode 100644 index 00000000..993a4481 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/reducer/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/500.scientific/505.map-reduce/reducer/python/function.py b/benchmarks/500.scientific/505.map-reduce/reducer/python/function.py new file mode 100644 index 00000000..8867fcdc --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/reducer/python/function.py @@ -0,0 +1,30 @@ +import datetime, io, json, os, uuid + +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + text = event['input'] + + count = 0 + word_for_this_reducer = '' + + process_begin = datetime.datetime.now() + words = text.split('\n')[:-1] + for word in words: + splits = word.split(',') + word_for_this_reducer = splits[0] + count += int(splits[1]) + process_end = datetime.datetime.now() + + process_time = (process_end - process_start) / datetime.timedelta(microseconds=1) + return { + 'result': { # Could also be written to S3 + word_for_this_reducer: count + }, + 'measurement': { + 'process_time': process_time + }, + 'fns_triggered': 0 + } diff --git a/benchmarks/500.scientific/505.map-reduce/reducer/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/reducer/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/500.scientific/505.map-reduce/sorter/config.json b/benchmarks/500.scientific/505.map-reduce/sorter/config.json new file mode 100644 index 00000000..57fb5b4a --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/sorter/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 256, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/500.scientific/505.map-reduce/sorter/python/function.py b/benchmarks/500.scientific/505.map-reduce/sorter/python/function.py new file mode 100644 index 00000000..5163a0a6 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/sorter/python/function.py @@ -0,0 +1,101 @@ +import datetime, io, json, os, uuid + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + mappers = int(event['mappers']) + + # check that all files from the mappers are ready + list_begin = datetime.datetime.now() + objs = client.list_objects( + misc.function_name( + fname='sorter', + language='python', + version='3.9', + trigger='storage' + ) + ) + list_end = datetime.datetime.now() + list_time = (list_end - list_begin) / datetime.timedelta(microseconds=1) + + if (len(objs) != mappers): + return { + 'result': 0, + 'measurement': { + 'list_time': list_time + } + } + + # download everything and stick it together: ['bear,1', 'pear,3', 'pear,4'] + process_begin = datetime.datetime.now() + word_list = [] + for obj in objs: + words = client.get_object(fn_name, obj) + words = json.loads(words) + + for k, v in words['counts'].items(): + word_list.append('{},{}'.format(k, str(v))) + + # sort + word_list.sort() + + # everything which is the same goes into one file, e.g. all pears + current = [word_list[0]] + groups = [] + for i in range(0, len(word_list) - 1): + if word_list[i].split(',')[0] == word_list[i + 1].split(',')[0]: + current.append(word_list[i + 1]) + else: + groups.append(current) + current = [word_list[i + 1]] + if (len(current)): + groups.append(current) + + # flatten groups + new_group = [] + for group in groups: + flattened = '' + for word in group: + flattened += word + '\n' + new_group.append(flattened) + groups = new_group + process_end = datetime.datetime.now() + + # publish to bucket + upload_begin = datetime.datetime.now() + fns_triggered = len(groups) + for group in groups: + word = group.split(',')[0] + + reducer_input = { + 'input': group, + 'parent_execution_id': event['request-id'] + } + + local_path = f'/tmp/{word}' + with open(local_path, 'w') as f: + f.write(json.dumps(reducer_input)) + + fn_name = misc.function_name( + fname='reducer', + language='python', + version='3.9', + trigger='storage' + ) + client.upload(fn_name, word, local_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': fns_triggered, + 'measurement': { + 'list_time': list_time, + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/500.scientific/505.map-reduce/sorter/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/sorter/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/500.scientific/505.map-reduce/splitter/config.json b/benchmarks/500.scientific/505.map-reduce/splitter/config.json new file mode 100644 index 00000000..40218357 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/splitter/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "storage", + "entrypoint": true +} diff --git a/benchmarks/500.scientific/505.map-reduce/splitter/python/function.py b/benchmarks/500.scientific/505.map-reduce/splitter/python/function.py new file mode 100644 index 00000000..3bc0b0f7 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/splitter/python/function.py @@ -0,0 +1,44 @@ +import datetime, io, json, os + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + mappers = int(event['mappers']) + text = event['text'] + + # split by . + sentences = text.split('.') + + # obtain length of list + chunk_size = len(sentences) // mappers + + # split the list according to how many mappers are declared + local_path = '/tmp/payload.json' + for i in range(mappers): + begin_range = i * chunk_size + end_range = min((i + 1) * chunk_size, len(sentences)) + mapper_input = { + 'text': ' '.join(sentences[begin_range : end_range]), + 'mappers': mappers, + 'parent_execution_id': event['request-id'] + } + with open(local_path, 'w') as f: + f.write(json.dumps(mapper_input)) + + # storage trigger code: for each mapper, upload to bucket + bucket = misc.function_name( + fname='mapper', + language='python', + version='3.9', + trigger='storage' + ) + client.upload(bucket, f'payload{i}.json', local_path, True) + + return { + 'result': 0, + 'fns_triggered': mappers, + 'measurement': {} + } diff --git a/benchmarks/500.scientific/505.map-reduce/splitter/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/splitter/python/requirements.txt new file mode 100644 index 00000000..e69de29b