From 4bd6f44b77071ff8bb722ab78ea3a220583c9e67 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:23:35 +0200 Subject: [PATCH 01/19] Small Python version changes --- config/example.json | 2 +- config/systems.json | 3 ++- docs/platforms.md | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/config/example.json b/config/example.json index dc4da9ad..f405a3be 100644 --- a/config/example.json +++ b/config/example.json @@ -6,7 +6,7 @@ "download_results": false, "runtime": { "language": "python", - "version": "3.7" + "version": "3.9" }, "type": "invocation-overhead", "perf-cost": { diff --git a/config/systems.json b/config/systems.json index 8272078f..7009cbdb 100644 --- a/config/systems.json +++ b/config/systems.json @@ -18,7 +18,8 @@ "python": { "base_images": { "3.7": "python:3.7-slim", - "3.8": "python:3.8-slim" + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim" }, "images": [ "run", diff --git a/docs/platforms.md b/docs/platforms.md index ea3cd916..75364415 100644 --- a/docs/platforms.md +++ b/docs/platforms.md @@ -85,9 +85,9 @@ AZURE_SECRET_PASSWORD = XXXXXXXXXXXXX You can pass the credentials either using the environment variables: ``` -export AZURE_SECRET_APPLICATION_ID = XXXXXXXXXXXXXXXX -export AZURE_SECRET_TENANT = XXXXXXXXXXXX -export AZURE_SECRET_PASSWORD = XXXXXXXXXXXXX +export AZURE_SECRET_APPLICATION_ID=XXXXXXXXXXXXXXXX +export AZURE_SECRET_TENANT=XXXXXXXXXXXX +export AZURE_SECRET_PASSWORD=XXXXXXXXXXXXX ``` or in the JSON input configuration: From 728288e4cd00f9aa902a53793af0b6de17adb720 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:26:01 +0200 Subject: [PATCH 02/19] AWS queue and storage triggers --- benchmarks/wrappers/aws/python/handler.py | 17 ++- sebs/aws/aws.py | 18 ++- sebs/aws/function.py | 4 +- sebs/aws/triggers.py | 173 +++++++++++++++++++++- sebs/faas/function.py | 1 + 5 files changed, 206 insertions(+), 7 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 907b2c61..0bcfeab0 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,18 +1,31 @@ import datetime, io, json, os, sys, uuid +import boto3 # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) -# TODO: usual trigger -# implement support for S3 and others def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() + # Queue trigger + if ("Records" in event and event["Records"][0]["eventSource"] == 'aws:sqs'): + event = json.loads(event["Records"][0]["body"]) + + # Storage trigger + if ("Records" in event and "s3" in event["Records"][0]): + s3_client = boto3.client('s3') + bucket_name = event["Records"][0]["s3"]["bucket"]["name"] + file_name = event["Records"][0]["s3"]["object"]["key"] + + obj = s3_client.get_object(Bucket=bucket_name, Key=file_name) + event = json.loads(obj['Body'].read()) + # HTTP trigger with API Gateaway if 'body' in event: event = json.loads(event['body']) + req_id = context.aws_request_id event['request-id'] = req_id event['income-timestamp'] = income_timestamp diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 04f2b964..32485457 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -252,13 +252,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun def cached_function(self, function: Function): - from sebs.aws.triggers import LibraryTrigger + from sebs.aws.triggers import LibraryTrigger, QueueTrigger, StorageTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).deployment_client = self for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers + for trigger in function.triggers(Trigger.TriggerType.QUEUE): + trigger.logging_handlers = self.logging_handlers + cast(QueueTrigger, trigger).deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.STORAGE): + trigger.logging_handlers = self.logging_handlers + cast(StorageTrigger, trigger).deployment_client = self """ Update function code and configuration on AWS. @@ -478,7 +484,7 @@ def download_metrics( ) def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.aws.triggers import HTTPTrigger + from sebs.aws.triggers import HTTPTrigger, QueueTrigger, StorageTrigger function = cast(LambdaFunction, func) @@ -505,6 +511,14 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.LIBRARY: # should already exist return func.triggers(Trigger.TriggerType.LIBRARY)[0] + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(func.name, self) + trigger.logging_handlers = self.logging_handlers + self.logging.info(f"Created Queue trigger for {func.name} function.") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(func.name, self) + trigger.logging_handlers = self.logging_handlers + self.logging.info(f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") diff --git a/sebs/aws/function.py b/sebs/aws/function.py index a36dc821..2787ce86 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -38,7 +38,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": from sebs.faas.function import Trigger - from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + from sebs.aws.triggers import LibraryTrigger, HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = LambdaFunction( @@ -54,7 +54,7 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": LibraryTrigger, "HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f1831459..34ffafae 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -2,7 +2,10 @@ import concurrent.futures import datetime import json -from typing import Dict, Optional # noqa +from typing import Dict, Optional +import uuid # noqa + +import boto3 from sebs.aws.aws import AWS from sebs.faas.function import ExecutionResult, Trigger @@ -123,3 +126,171 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return HTTPTrigger(obj["url"], obj["api-id"]) + + +class QueueTrigger(Trigger): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + super().__init__() + self.name = fname + self._deployment_client = deployment_client + + @staticmethod + def typename() -> str: + return "AWS.QueueTrigger" + + @property + def deployment_client(self) -> AWS: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: AWS): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke function {self.name}") + + # Init clients + lambda_client = self.deployment_client.get_lambda_client() + sqs_client = boto3.client('sqs', region_name=self.deployment_client.config.region) + + serialized_payload = json.dumps(payload) + + # Create queue + self.logging.debug(f"Creating queue {self.name}") + + queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] + queue_arn = sqs_client.get_queue_attributes( + QueueUrl=queue_url, + AttributeNames=["QueueArn"] + )["Attributes"]["QueueArn"] + + self.logging.debug(f"Created queue") + + # Add queue trigger + if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, + FunctionName=self.name) + ["EventSourceMappings"])): + lambda_client.create_event_source_mapping( + EventSourceArn=queue_arn, + FunctionName=self.name, + MaximumBatchingWindowInSeconds=1 + ) + + # Publish payload to queue + sqs_client.send_message(QueueUrl=queue_url, MessageBody=serialized_payload) + self.logging.info(f"Sent message to queue {self.name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Queue", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger(obj["name"]) + + +class StorageTrigger(Trigger): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + super().__init__() + self.name = fname + self._deployment_client = deployment_client + + @staticmethod + def typename() -> str: + return "AWS.StorageTrigger" + + @property + def deployment_client(self) -> AWS: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: AWS): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke function {self.name}") + + # Init clients + lambda_client = self.deployment_client.get_lambda_client() + s3 = boto3.resource('s3') + + # Prep + serialized_payload = json.dumps(payload) + bucket_name = self.name.replace('_', '-') # AWS disallows underscores in bucket names + function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"]["FunctionArn"] + + # Create bucket + self.logging.info(f"Creating bucket {bucket_name}") + + region = self.deployment_client.config.region + if (region == "us-east-1"): + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={ + "LocationConstraint": region + } + ) + + self.logging.info("Created bucket") + + lambda_client.add_permission( + FunctionName=self.name, + StatementId=str(uuid.uuid1()), + Action="lambda:InvokeFunction", + Principal="s3.amazonaws.com", + SourceArn=f"arn:aws:s3:::{bucket_name}", + ) + + # Add bucket trigger + bucket_notification = s3.BucketNotification(bucket_name) + bucket_notification.put( + NotificationConfiguration={'LambdaFunctionConfigurations': [ + { + 'LambdaFunctionArn': function_arn, + 'Events': [ + 's3:ObjectCreated:*' + ], + + }, + ]} + ) + + # Put object + s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) + self.logging.info(f"Uploaded payload to bucket {bucket_name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Storage", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger(obj["name"]) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index c2226cee..df732360 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -179,6 +179,7 @@ class TriggerType(Enum): HTTP = "http" LIBRARY = "library" STORAGE = "storage" + QUEUE = "queue" @staticmethod def get(name: str) -> "Trigger.TriggerType": From 9c3a01653a2afef2bc24cb8ff295d571e8f1cc53 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:27:32 +0200 Subject: [PATCH 03/19] GCP queue and storage triggers --- benchmarks/wrappers/gcp/python/handler.py | 38 +++++- sebs/gcp/function.py | 9 +- sebs/gcp/gcp.py | 149 ++++++++++++++++++---- sebs/gcp/triggers.py | 116 +++++++++++++++++ 4 files changed, 282 insertions(+), 30 deletions(-) diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index b9017b52..e5093061 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -1,9 +1,10 @@ -import datetime, io, json, os, uuid, sys +import base64, datetime, io, json, os, uuid, sys -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +from google.cloud import storage as gcp_storage +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) -def handler(req): +def handler_http(req): income_timestamp = datetime.datetime.now().timestamp() req_id = req.headers.get('Function-Execution-Id') @@ -62,3 +63,34 @@ def handler(req): 'cold_start_var': cold_start_var, 'container_id': container_id, }), 200, {'ContentType': 'application/json'} + +def handler_queue(data, context): + serialized_payload = data.get('data') + payload = json.loads(base64.b64decode(serialized_payload).decode("ascii")) + + from function import function + ret = function.handler(payload) + + # TODO(oana) + +def handler_storage(data, context): + bucket_name = data.get('bucket') + name = data.get('name') + filepath = '/tmp/bucket_contents' + client = gcp_storage.Client(); + + print("Download {}:{} to {}".format(bucket_name, name, filepath)) + print(data) + bucket_instance = client.bucket(bucket_name) + blob = bucket_instance.blob(name) + blob.download_to_filename(filepath) + + payload = {} + + with open(filepath, 'r') as fp: + payload = json.load(fp) + + from function import function + ret = function.handler(payload) + + # TODO(oana) diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index d9c55a03..8354abc8 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -29,7 +29,8 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, \ + QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = GCPFunction( @@ -42,7 +43,11 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger + }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 45146974..bef8950a 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -99,6 +99,86 @@ def get_storage( self.storage.replace_existing = replace_existing return self.storage + """ + Provide the fully qualified name of a trigger resource (queue or storage). + """ + def get_trigger_resource_name(self, func_name: str) -> str: + trigger = func_name.split("-")[-1] + + assert trigger == "queue" or trigger == "storage" + + if (trigger == "queue"): + return 'projects/{project_name}/topics/{topic}'.format( + project_name=self.config.project_name, + topic=func_name + ) + else: + return 'projects/{project_name}/buckets/{bucket}'.format( + project_name=self.config.project_name, + bucket=func_name + ) + + """ + Trigger resources (queue, bucket) must exist on GCP before the + corresponding function is first deployed. + + This function creates the required resources and returns a dict + containing trigger information required by create_req inside of + create_function. + + :param func_name: the name of the function to be deployed, + including its trigger + + :return: JSON/dict with the trigger configuration required by GCP + on function creation/update + """ + def create_trigger_resource(self, func_name: str) -> Dict: + trigger = func_name.split("-")[-1] + + if (trigger == "queue"): + pub_sub = build("pubsub", "v1", cache_discovery=False) + topic_name = self.get_trigger_resource_name(func_name) + + self.logging.info(f"Creating queue '{topic_name}'") + try: + pub_sub.projects().topics().create(name=topic_name).execute() + self.logging.info("Created queue") + except HttpError as http_error: + if (http_error.resp.status == 409): + self.logging.info("Queue already exists, reusing...") + + return { + "eventTrigger": { + "eventType": "providers/cloud.pubsub/eventTypes/topic.publish", + "resource": topic_name, + }, + "entryPoint": "handler_queue", + } + elif (trigger == "storage"): + storage = build("storage", "v1", cache_discovery=False) + bucket_name = self.get_trigger_resource_name(func_name) + + self.logging.info(f"Creating storage bucket '{bucket_name}'") + try: + storage.buckets().insert( + project=self.config.project_name, + body={ "name": func_name }, + ).execute() + self.logging.info("Created storage bucket") + except HttpError as http_error: + if (http_error.resp.status == 409): + self.logging.info("Storage bucket already exists, reusing...") + + return { + "eventTrigger": { + "eventType": "google.storage.object.finalize", + "resource": bucket_name, + }, + "entryPoint": "handler_storage", + } + # HTTP triggers do not require resource creation + return { "httpsTrigger": {}, "entryPoint": "handler_http" } + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -212,6 +292,10 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti try: get_req.execute() except HttpError: + # Before creating the function, ensure all trigger resources (queue, + # bucket) exist on GCP. + trigger_info = self.create_trigger_resource(func_name) + create_req = ( self.function_client.projects() .locations() @@ -222,14 +306,12 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti ), body={ "name": full_func_name, - "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": memory, "timeout": str(timeout) + "s", - "httpsTrigger": {}, "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_package_name, - }, + } | trigger_info, ) ) create_req.execute() @@ -278,28 +360,34 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti return function def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.gcp.triggers import HTTPTrigger + from sebs.gcp.triggers import HTTPTrigger, QueueTrigger, StorageTrigger - if trigger_type == Trigger.TriggerType.HTTP: + location = self.config.region + project_name = self.config.project_name + full_func_name = GCP.get_full_function_name(project_name, location, function.name) + self.logging.info(f"Function {function.name} - waiting for deployment...") + our_function_req = ( + self.function_client.projects().locations().functions().get(name=full_func_name) + ) + deployed = False + while not deployed: + status_res = our_function_req.execute() + if status_res["status"] == "ACTIVE": + deployed = True + else: + time.sleep(3) + self.logging.info(f"Function {function.name} - deployed!") - location = self.config.region - project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) - self.logging.info(f"Function {function.name} - waiting for deployment...") - our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) - ) - deployed = False - while not deployed: - status_res = our_function_req.execute() - if status_res["status"] == "ACTIVE": - deployed = True - else: - time.sleep(3) - self.logging.info(f"Function {function.name} - deployed!") + if trigger_type == Trigger.TriggerType.HTTP: invoke_url = status_res["httpsTrigger"]["url"] - trigger = HTTPTrigger(invoke_url) + self.logging.info(f"Created HTTP trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(function.name, self) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(function.name) + self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") @@ -311,12 +399,20 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def cached_function(self, function: Function): from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger + from sebs.gcp.triggers import LibraryTrigger, QueueTrigger, StorageTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers gcp_trigger.deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.QUEUE): + gcp_trigger = cast(QueueTrigger, trigger) + gcp_trigger.logging_handlers = self.logging_handlers + gcp_trigger.deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.STORAGE): + gcp_trigger = cast(StorageTrigger, trigger) + gcp_trigger.logging_handlers = self.logging_handlers + gcp_trigger.deployment_client = self def update_function(self, function: Function, code_package: Benchmark): @@ -331,6 +427,11 @@ def update_function(self, function: Function, code_package: Benchmark): full_func_name = GCP.get_full_function_name( self.config.project_name, self.config.region, function.name ) + + # Before creating the function, ensure all trigger resources (queue, + # bucket) exist on GCP. + trigger_info = self.create_trigger_resource(function.name) + req = ( self.function_client.projects() .locations() @@ -339,13 +440,11 @@ def update_function(self, function: Function, code_package: Benchmark): name=full_func_name, body={ "name": full_func_name, - "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", - "httpsTrigger": {}, "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - }, + } | trigger_info, ) ) res = req.execute() diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6c..2ad08637 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -1,9 +1,15 @@ +import base64 import concurrent.futures import datetime import json +import os import time +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError from typing import Dict, Optional # noqa +from google.cloud import storage as gcp_storage + from sebs.gcp.gcp import GCP from sebs.faas.function import ExecutionResult, Trigger @@ -111,3 +117,113 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) + + +class QueueTrigger(Trigger): + def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + super().__init__() + self.name = fname + self._deployment_client = deployment_client + + @staticmethod + def typename() -> str: + return "GCP.QueueTrigger" + + @property + def deployment_client(self) -> GCP: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: GCP): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + pub_sub = build("pubsub", "v1", cache_discovery=False) + + # Prep + # GCP is very particular with data encoding... + serialized_payload = base64.b64encode(json.dumps(payload).encode("ascii")) + + # Publish payload to queue + pub_sub.projects().topics().publish( + topic=self.deployment_client.get_trigger_resource_name(self.name), + body={ + "messages": [{ + "data": serialized_payload.decode("utf-8") + }], + } + ).execute() + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Queue", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger(obj["name"]) + + +class StorageTrigger(Trigger): + def __init__(self, fname: str): + super().__init__() + self.name = fname + + @staticmethod + def typename() -> str: + return "GCP.StorageTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init clients + bucket_name = self.name + client = gcp_storage.Client(); + bucket_instance = client.bucket(bucket_name) + + # Prep + file_name = "payload.json" + with open(file_name, "w") as fp: + json.dump(payload, fp) + + # Upload object + gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 + blob = bucket_instance.blob(blob_name=payload, chunk_size=4 * 1024 * 1024) + blob.upload_from_filename(file_name) + + self.logging.info(f"Uploaded payload to bucket {bucket_name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Storage", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger(obj["name"]) From 63ab522fd98763dfcb7d2dcb31b7ddc4a9f1a461 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 15:29:01 +0200 Subject: [PATCH 04/19] Azure queue and storage triggers --- benchmarks/wrappers/azure/python/handler.py | 24 +++- sebs.py | 13 +- sebs/azure/azure.py | 136 +++++++++++++++++--- sebs/azure/function.py | 20 ++- sebs/azure/triggers.py | 125 ++++++++++++++++++ sebs/benchmark.py | 6 +- sebs/experiments/config.py | 9 +- 7 files changed, 301 insertions(+), 32 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 5f7f14f2..6375de39 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -1,12 +1,11 @@ -import datetime, io, json, os, uuid +import base64 +import datetime, io, json, logging, os, uuid import azure.functions as func -# TODO: usual trigger -# implement support for blob and others -def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: +def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() if 'connection_string' in req_json: @@ -73,3 +72,20 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: mimetype="application/json" ) +def handler_queue(msg: func.QueueMessage): + logging.info('Python queue trigger function processed a queue item.') + payload = msg.get_body().decode('utf-8') + + from . import function + ret = function.handler(payload) + + # TODO(oana) + +def handler_storage(blob: func.InputStream): + logging.info('Python Blob trigger function processed %s', blob.name) + payload = blob.readline().decode('utf-8') # TODO(oana) + + from . import function + ret = function.handler(payload) + + # TODO(oana) diff --git a/sebs.py b/sebs.py index fe25155f..50bca451 100755 --- a/sebs.py +++ b/sebs.py @@ -166,7 +166,7 @@ def benchmark(): @click.option("--repetitions", default=5, type=int, help="Number of experimental repetitions.") @click.option( "--trigger", - type=click.Choice(["library", "http"]), + type=click.Choice(["library", "http", "queue", "storage"]), default="http", help="Function trigger to be used.", ) @@ -217,6 +217,9 @@ def invoke( if image_tag_prefix is not None: sebs_client.config.image_tag_prefix = image_tag_prefix + # Insert trigger into (experiment) config. Required by Azure when packaging. + update_nested_dict(config, ["experiments", "trigger"], (trigger if trigger is not None else "http")) + experiment_config = sebs_client.get_experiment_config(config["experiments"]) update_nested_dict(config, ["experiments", "benchmark"], benchmark) benchmark_obj = sebs_client.get_benchmark( @@ -230,9 +233,15 @@ def invoke( if timeout is not None: benchmark_obj.benchmark_config.timeout = timeout + function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) + + # GCP: augment function name with trigger type: _http, _queue etc. + if deployment_client.name() == "gcp" or deployment_client.name() == "azure": + function_name = "{}-{}".format(function_name, trigger) + func = deployment_client.get_function( benchmark_obj, - function_name if function_name else deployment_client.default_function_name(benchmark_obj), + function_name, ) storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 17316c2b..974e1bcb 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -11,7 +11,7 @@ from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction from sebs.azure.config import AzureConfig, AzureResources -from sebs.azure.triggers import AzureTrigger, HTTPTrigger +from sebs.azure.triggers import AzureTrigger, HTTPTrigger, QueueTrigger, StorageTrigger from sebs.faas.function import Trigger from sebs.benchmark import Benchmark from sebs.cache import Cache @@ -35,6 +35,10 @@ class Azure(System): def name(): return "azure" + @staticmethod + def typename(): + return "Azure" + @property def config(self) -> AzureConfig: return self._config @@ -114,6 +118,60 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage.replace_existing = replace_existing return self.storage + """ + Composes the JSON config that describes the trigger and bindings configs + for a given function to be run on Azure. + + :param benchmark: + :param exec_files: the files which define and implement the function to be executed + :return: JSON dictionary containing the function configuration + """ + def create_function_json(self, benchmark, exec_files) -> Dict: + trigger = benchmark.split("-")[-1] + + if (trigger == "queue"): + return { + "scriptFile": exec_files, + "entryPoint": "handler_queue", + "bindings": [ + { + "name": "msg", + "type": "queueTrigger", + "direction": "in", + "queueName": benchmark, + "connection": "AzureWebJobsStorage" + } + ] + } + elif (trigger == "storage"): + return { + "scriptFile": exec_files, + "entryPoint": "handler_storage", + "bindings": [ + { + "name": "blob", + "type": "blobTrigger", + "direction": "in", + "path": benchmark, + "connection": "AzureWebJobsStorage" + } + ] + } + return { # HTTP + "scriptFile": exec_files, + "entryPoint": "handler_http", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["get", "post"], + }, + {"type": "http", "direction": "out", "name": "$return"}, + ], + } + # Directory structure # handler # - source files @@ -148,23 +206,26 @@ def package_code( source_file = os.path.join(directory, f) shutil.move(source_file, handler_dir) + benchmark_stripped = '-'.join(benchmark.split("-")[:-1]) + trigger = benchmark.split("-")[-1] + func_name = ( + "{}-{}-{}-{}-{}".format( + benchmark_stripped, + language_name, + language_version, + self.config.resources_id, + trigger + ) + .replace(".", "-") + .replace("_", "-") + ) + # generate function.json - # TODO: extension to other triggers than HTTP - default_function_json = { - "scriptFile": EXEC_FILES[language_name], - "bindings": [ - { - "authLevel": "anonymous", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["get", "post"], - }, - {"type": "http", "direction": "out", "name": "$return"}, - ], - } json_out = os.path.join(directory, "handler", "function.json") - json.dump(default_function_json, open(json_out, "w"), indent=2) + json.dump( + self.create_function_json(func_name, EXEC_FILES[language_name]), + open(json_out, "w"), indent=2 + ) # generate host.json default_host_json = { @@ -258,9 +319,11 @@ def update_function(self, function: Function, code_package: Benchmark): self._mount_function_code(code_package) url = self.publish_function(function, code_package, True) - trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) + # TODO(oana): this might need refactoring + if (function.name.endswith("http")): + trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) def update_function_configuration(self, function: Function, code_package: Benchmark): # FIXME: this does nothing currently - we don't specify timeout @@ -368,7 +431,6 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct return function def cached_function(self, function: Function): - data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) @@ -494,8 +556,40 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) """ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - raise NotImplementedError() + from sebs.azure.triggers import QueueTrigger, StorageTrigger + azure_function = cast(AzureFunction, function) + resource_group = self.config.resources.resource_group(self.cli_instance) + storage_account = azure_function.function_storage.account_name + + ret = self.cli_instance.execute( + ('az storage account show --resource-group {} --name {} --query id') + .format(resource_group, storage_account) + ) + self.cli_instance.execute( + ('az role assignment create --assignee "{}" \ + --role "Storage {} Data Contributor" \ + --scope {}') + .format( + os.environ["AZURE_USER_PRINCIPAL_NAME"], + "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", + ret.decode("utf-8") + ) + ) + + if trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(function.name, storage_account) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(function.name, storage_account) + self.logging.info(f"Created Storage trigger for {function.name} function") + else: + raise RuntimeError("Not supported!") + + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) + self.cache_client.update_function(function) + return trigger # # def create_azure_function(self, fname, config): diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 61ef4c57..c822f545 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,3 +1,5 @@ +from typing import cast + from sebs.azure.config import AzureResources from sebs.faas.function import Function, FunctionConfig @@ -13,6 +15,10 @@ def __init__( ): super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage + + @staticmethod + def typename() -> str: + return "Azure.AzureFunction" def serialize(self) -> dict: return { @@ -22,6 +28,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + from sebs.faas.function import Trigger + from sebs.azure.triggers import HTTPTrigger, \ + QueueTrigger, StorageTrigger + cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], @@ -30,10 +40,14 @@ def deserialize(cached_config: dict) -> Function: AzureResources.Storage.deserialize(cached_config["function_storage"]), cfg, ) - from sebs.azure.triggers import HTTPTrigger - for trigger in cached_config["triggers"]: - trigger_type = {"HTTP": HTTPTrigger}.get(trigger["type"]) + trigger_type = cast( + Trigger, + {"HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger + }.get(trigger["type"]), + ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 66be8c6d..e893e958 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,6 +1,15 @@ +import base64 import concurrent.futures +import json +import os from typing import Any, Dict, Optional # noqa +from azure.core.exceptions import ResourceExistsError +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobServiceClient +from azure.storage.queue import QueueServiceClient, QueueClient, QueueMessage, BinaryBase64DecodePolicy, BinaryBase64EncodePolicy +from sebs.azure.cli import AzureCLI + from sebs.azure.config import AzureResources from sebs.faas.function import ExecutionResult, Trigger @@ -45,3 +54,119 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) + + +class QueueTrigger(Trigger): + def __init__(self, fname: str, storage_account: str): + super().__init__() + self.name = fname + self.storage_account = storage_account + + @staticmethod + def typename() -> str: + return "Azure.QueueTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + account_url = f"https://{self.storage_account}.queue.core.windows.net" + default_credential = DefaultAzureCredential() + queue_client = QueueClient(account_url, + queue_name=self.name, + credential=default_credential) + + serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') + + # Create queue + self.logging.info(f"Creating queue {self.name}") + + try: + queue_client.create_queue() + self.logging.info("Created queue") + except ResourceExistsError: + self.logging.info("Queue already exists, reusing...") + + # Publish payload to queue + queue_client.send_message(serialized_payload) + self.logging.info(f"Sent message to queue {self.name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Queue", "name": self.name, "storage_account": self.storage_account} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger(obj["name"], obj["storage_account"]) + + +class StorageTrigger(Trigger): + def __init__(self, fname: str, storage_account: str): + super().__init__() + self.name = fname + self.storage_account = storage_account + + @staticmethod + def typename() -> str: + return "Azure.StorageTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + account_url = f"https://{self.storage_account}.blob.core.windows.net" + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(account_url, credential=default_credential) + + # Create container + container_name = self.name + self.logging.info(f"Creating container {container_name}") + try: + blob_service_client.create_container(container_name) + self.logging.info("Created container") + except ResourceExistsError: + self.logging.info("Container already exists, reusing...") + + # Prepare blob + file_name = "payload.json" + with open(file_name, 'w') as fp: + json.dump(payload, fp) + + # Upload blob + blob_client = blob_service_client.get_blob_client(container=container_name, + blob=file_name) + with open(file=file_name, mode="rb") as payload: + blob_client.upload_blob(payload, overwrite=True) + self.logging.info(f"Uploaded payload to container {container_name}") + + # TODO(oana): gather metrics + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return {"type": "Storage", "name": self.name, "storage_account": self.storage_account} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger(obj["name"], obj["storage_account"]) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 4673647c..e1abc419 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -493,6 +493,10 @@ def build( shutil.rmtree(self._output_dir) os.makedirs(self._output_dir) + benchmark = self.benchmark + if self._deployment_name == "azure": + benchmark = "{}-{}".format(benchmark, self._experiment_config.trigger) + self.copy_code(self._output_dir) self.add_benchmark_data(self._output_dir) self.add_deployment_files(self._output_dir) @@ -502,7 +506,7 @@ def build( os.path.abspath(self._output_dir), self.language_name, self.language_version, - self.benchmark, + benchmark, self.is_cached, ) self.logging.info( diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index a5ca3f0b..51cedd52 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -1,6 +1,6 @@ from typing import Dict -from sebs.faas.function import Runtime +from sebs.faas.function import Runtime, Trigger class Config: @@ -11,6 +11,7 @@ def __init__(self): self._flags: Dict[str, bool] = {} self._experiment_configs: Dict[str, dict] = {} self._runtime = Runtime(None, None) + self._trigger: Trigger.TriggerType @property def update_code(self) -> bool: @@ -31,6 +32,10 @@ def check_flag(self, key: str) -> bool: def runtime(self) -> Runtime: return self._runtime + @property + def trigger(self) -> Trigger.TriggerType: + return self._trigger + def experiment_settings(self, name: str) -> dict: return self._experiment_configs[name] @@ -42,6 +47,7 @@ def serialize(self) -> dict: "runtime": self._runtime.serialize(), "flags": self._flags, "experiments": self._experiment_configs, + "trigger": self._trigger, } return out @@ -55,6 +61,7 @@ def deserialize(config: dict) -> "Config": cfg._download_results = config["download_results"] cfg._runtime = Runtime.deserialize(config["runtime"]) cfg._flags = config["flags"] if "flags" in config else {} + cfg._trigger = config["trigger"] if "trigger" in config else {} from sebs.experiments import ( NetworkPingPong, From 0f7454a0e5573f3c660131f8f654555115ba6f65 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 6 May 2024 18:29:11 +0200 Subject: [PATCH 05/19] Linting --- sebs/aws/function.py | 6 +++++- sebs/aws/triggers.py | 16 ++++++++-------- sebs/azure/azure.py | 2 +- sebs/azure/function.py | 7 +++---- sebs/azure/triggers.py | 4 +--- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 9c125faf..de4fcc6e 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -55,7 +55,11 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get(trigger["type"]), + {"Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger + }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 34ffafae..a3ed2ff3 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -2,7 +2,7 @@ import concurrent.futures import datetime import json -from typing import Dict, Optional +from typing import Optional import uuid # noqa import boto3 @@ -160,22 +160,22 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: sqs_client = boto3.client('sqs', region_name=self.deployment_client.config.region) serialized_payload = json.dumps(payload) - + # Create queue self.logging.debug(f"Creating queue {self.name}") - + queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] queue_arn = sqs_client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=["QueueArn"] )["Attributes"]["QueueArn"] - self.logging.debug(f"Created queue") + self.logging.debug("Created queue") # Add queue trigger if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, FunctionName=self.name) - ["EventSourceMappings"])): + ["EventSourceMappings"])): lambda_client.create_event_source_mapping( EventSourceArn=queue_arn, FunctionName=self.name, @@ -236,7 +236,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prep serialized_payload = json.dumps(payload) bucket_name = self.name.replace('_', '-') # AWS disallows underscores in bucket names - function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"]["FunctionArn"] + function_arn = lambda_client.get_function(FunctionName=self.name) \ + ["Configuration"]["FunctionArn"] # Create bucket self.logging.info(f"Creating bucket {bucket_name}") @@ -273,8 +274,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: ], }, - ]} - ) + ]}) # Put object s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index a2c9a7f5..3ac14499 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -12,7 +12,7 @@ from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction from sebs.azure.config import AzureConfig, AzureResources -from sebs.azure.triggers import AzureTrigger, HTTPTrigger, QueueTrigger, StorageTrigger +from sebs.azure.triggers import AzureTrigger, HTTPTrigger from sebs.faas.function import Trigger from sebs.benchmark import Benchmark from sebs.cache import Cache diff --git a/sebs/azure/function.py b/sebs/azure/function.py index c822f545..8970d90d 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -15,7 +15,7 @@ def __init__( ): super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage - + @staticmethod def typename() -> str: return "Azure.AzureFunction" @@ -29,8 +29,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: from sebs.faas.function import Trigger - from sebs.azure.triggers import HTTPTrigger, \ - QueueTrigger, StorageTrigger + from sebs.azure.triggers import HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( @@ -46,7 +45,7 @@ def deserialize(cached_config: dict) -> Function: {"HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger - }.get(trigger["type"]), + }.get(trigger["type"]) ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index e893e958..ed3c3eb2 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,14 +1,12 @@ import base64 import concurrent.futures import json -import os from typing import Any, Dict, Optional # noqa from azure.core.exceptions import ResourceExistsError from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient -from azure.storage.queue import QueueServiceClient, QueueClient, QueueMessage, BinaryBase64DecodePolicy, BinaryBase64EncodePolicy -from sebs.azure.cli import AzureCLI +from azure.storage.queue import QueueClient from sebs.azure.config import AzureResources from sebs.faas.function import ExecutionResult, Trigger From 8f96edab6ab488b227454d89826c78cb4080d794 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 20 May 2024 18:35:18 +0200 Subject: [PATCH 06/19] Address comments, lint --- benchmarks/wrappers/aws/python/handler.py | 10 ++--- benchmarks/wrappers/aws/python/storage.py | 3 ++ benchmarks/wrappers/gcp/python/handler.py | 11 ++--- requirements.azure.txt | 2 + sebs.py | 9 +++- sebs/aws/aws.py | 54 +++++++++++++++-------- sebs/aws/config.py | 24 ++++++---- sebs/aws/function.py | 8 ++-- sebs/aws/s3.py | 27 ++++++++---- sebs/aws/triggers.py | 41 ++++++++++------- sebs/azure/azure.py | 13 +++--- sebs/benchmark.py | 4 +- sebs/cache.py | 25 ++++++++--- sebs/gcp/triggers.py | 4 +- 14 files changed, 152 insertions(+), 83 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 0bcfeab0..2601dddf 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,6 +1,4 @@ - import datetime, io, json, os, sys, uuid -import boto3 # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) @@ -15,11 +13,13 @@ def handler(event, context): # Storage trigger if ("Records" in event and "s3" in event["Records"][0]): - s3_client = boto3.client('s3') bucket_name = event["Records"][0]["s3"]["bucket"]["name"] file_name = event["Records"][0]["s3"]["object"]["key"] - - obj = s3_client.get_object(Bucket=bucket_name, Key=file_name) + + from function import storage + storage_inst = storage.storage.get_instance() + + obj = storage_inst.get_object(bucket_name, file_name) event = json.loads(obj['Body'].read()) # HTTP trigger with API Gateaway diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 4be0025e..602319df 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -46,6 +46,9 @@ def download_stream(self, bucket, file): data = io.BytesIO() self.client.download_fileobj(bucket, file, data) return data.getbuffer() + + def get_object(self, bucket, file): + return self.client.get_object(Bucket=bucket, Key=file) def get_instance(): if storage.instance is None: diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index e5093061..6a1284e5 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -66,7 +66,7 @@ def handler_http(req): def handler_queue(data, context): serialized_payload = data.get('data') - payload = json.loads(base64.b64decode(serialized_payload).decode("ascii")) + payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) from function import function ret = function.handler(payload) @@ -77,13 +77,10 @@ def handler_storage(data, context): bucket_name = data.get('bucket') name = data.get('name') filepath = '/tmp/bucket_contents' - client = gcp_storage.Client(); - print("Download {}:{} to {}".format(bucket_name, name, filepath)) - print(data) - bucket_instance = client.bucket(bucket_name) - blob = bucket_instance.blob(name) - blob.download_to_filename(filepath) + from function import storage + storage_inst = storage.storage.get_instance() + storage_inst.download(bucket_name, name, filepath) payload = {} diff --git a/requirements.azure.txt b/requirements.azure.txt index f7d82499..4fed51ac 100644 --- a/requirements.azure.txt +++ b/requirements.azure.txt @@ -1 +1,3 @@ azure-storage-blob==12.10.0 +azure-storage-queue==12.9.0 +azure-identity==1.16.0 diff --git a/sebs.py b/sebs.py index 3f9649c9..567074ae 100755 --- a/sebs.py +++ b/sebs.py @@ -225,7 +225,9 @@ def invoke( sebs_client.config.image_tag_prefix = image_tag_prefix # Insert trigger into (experiment) config. Required by Azure when packaging. - update_nested_dict(config, ["experiments", "trigger"], (trigger if trigger is not None else "http")) + # TODO(oana) is this still needed + trigger = trigger if trigger is not None else "http" + update_nested_dict(config, ["experiments", "trigger"], trigger) experiment_config = sebs_client.get_experiment_config(config["experiments"]) update_nested_dict(config, ["experiments", "benchmark"], benchmark) @@ -242,7 +244,10 @@ def invoke( function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) - # GCP: augment function name with trigger type: _http, _queue etc. + # GCP and Azure only allow one trigger per function, so augment function name with + # trigger type: _http, _queue etc. + # + # Additionally, Azure requires for the trigger to be defined at deployment time. if deployment_client.name() == "gcp" or deployment_client.name() == "azure": function_name = "{}-{}".format(function_name, trigger) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index c175a1a1..d48b8e17 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -149,11 +149,13 @@ def package_code( # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), + shell=True, cwd=directory) benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) self.logging.info("Created {} archive".format(benchmark_archive)) - bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive)) + bytes_size = os.path.getsize( + os.path.join(directory, benchmark_archive)) mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) @@ -186,7 +188,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun try: ret = self.client.get_function(FunctionName=func_name) self.logging.info( - "Function {} exists on AWS, retrieve configuration.".format(func_name) + "Function {} exists on AWS, retrieve configuration.".format( + func_name) ) # Here we assume a single Lambda role lambda_function = LambdaFunction( @@ -202,7 +205,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun lambda_function.updated_code = True # TODO: get configuration of REST API except self.client.exceptions.ResourceNotFoundException: - self.logging.info("Creating function {} from {}".format(func_name, package)) + self.logging.info( + "Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size # Limit to 50 MB @@ -216,16 +220,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun else: code_package_name = cast(str, os.path.basename(package)) - code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + code_bucket = storage_client.get_bucket( + Resources.StorageBucketType.DEPLOYMENT) code_prefix = os.path.join(benchmark, code_package_name) storage_client.upload(code_bucket, package, code_prefix) - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + self.logging.info( + "Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} ret = self.client.create_function( FunctionName=func_name, Runtime="{}{}".format( - language, self._map_language_runtime(language, language_runtime) + language, self._map_language_runtime( + language, language_runtime) ), Handler="handler.handler", Role=self.config.resources.lambda_role(self.session), @@ -293,7 +300,8 @@ def update_function(self, function: Function, code_package: Benchmark): # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.client.update_function_code(FunctionName=name, ZipFile=code_body.read()) + self.client.update_function_code( + FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -322,7 +330,8 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark MemorySize=function.config.memory, ) self.wait_function_updated(function) - self.logging.info(f"Updated configuration of {function.name} function. ") + self.logging.info( + f"Updated configuration of {function.name} function. ") @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -391,10 +400,12 @@ def parse_aws_report( return request_id output = requests[request_id] output.request_id = request_id - output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) + output.provider_times.execution = int( + float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: - output.provider_times.initialization = int(float(aws_vals["Init Duration"]) * 1000) + output.provider_times.initialization = int( + float(aws_vals["Init Duration"]) * 1000) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory @@ -428,12 +439,14 @@ def get_invocation_error(self, function_name: str, start_time: int, end_time: in time.sleep(5) response = self.logs_client.get_query_results(queryId=query_id) if len(response["results"]) == 0: - self.logging.info("AWS logs are not yet available, repeat after 15s...") + self.logging.info( + "AWS logs are not yet available, repeat after 15s...") time.sleep(15) response = None else: break - self.logging.error(f"Invocation error for AWS Lambda function {function_name}") + self.logging.error( + f"Invocation error for AWS Lambda function {function_name}") for message in response["results"]: for value in message: if value["field"] == "@message": @@ -480,7 +493,8 @@ def download_metrics( for val in results: for result_part in val: if result_part["field"] == "@message": - request_id = AWS.parse_aws_report(result_part["value"], requests) + request_id = AWS.parse_aws_report( + result_part["value"], requests) if request_id in requests: results_processed += 1 requests_ids.remove(request_id) @@ -497,7 +511,8 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api(api_name, function, self.session) + http_api = self.config.resources.http_api( + api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ # but this is wrong - source arn must be {api-arn}/*/* self.get_lambda_client().add_permission( @@ -520,11 +535,13 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info(f"Created Queue trigger for {func.name} function.") + self.logging.info( + f"Created Queue trigger for {func.name} function.") elif trigger_type == Trigger.TriggerType.STORAGE: trigger = StorageTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info(f"Created Storage trigger for {func.name} function.") + self.logging.info( + f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") @@ -538,7 +555,8 @@ def _enforce_cold_start(self, function: Function): FunctionName=func.name, Timeout=func.config.timeout, MemorySize=func.config.memory, - Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, + Environment={"Variables": { + "ForceColdStart": str(self.cold_start_counter)}}, ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 44c9a490..6de965d4 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -85,7 +85,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def update_cache(self, cache: Cache): - cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) + cache.update_config(val=self.account_id, keys=[ + "aws", "credentials", "account_id"]) def serialize(self) -> dict: out = {"account_id": self._account_id} @@ -145,7 +146,8 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: try: out = iam_client.get_role(RoleName=role_name) self._lambda_role = out["Role"]["Arn"] - self.logging.info(f"AWS: Selected {self._lambda_role} IAM role") + self.logging.info( + f"AWS: Selected {self._lambda_role} IAM role") except iam_client.exceptions.NoSuchEntityException: out = iam_client.create_role( RoleName=role_name, @@ -159,7 +161,8 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: time.sleep(10) # Attach basic AWS Lambda and S3 policies. for policy in attached_policies: - iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy) + iam_client.attach_role_policy( + RoleName=role_name, PolicyArn=policy) return self._lambda_role def http_api( @@ -221,9 +224,11 @@ def serialize(self) -> dict: def update_cache(self, cache: Cache): super().update_cache(cache) - cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) + cache.update_config(val=self._lambda_role, keys=[ + "aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): - cache.update_config(val=api.serialize(), keys=["aws", "resources", "http-apis", name]) + cache.update_config(val=api.serialize(), keys=[ + "aws", "resources", "http-apis", name]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -240,7 +245,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour if "resources" in config: AWSResources.initialize(ret, config["resources"]) ret.logging_handlers = handlers - ret.logging.info("No cached resources for AWS found, using user configuration.") + ret.logging.info( + "No cached resources for AWS found, using user configuration.") else: AWSResources.initialize(ret, {}) ret.logging_handlers = handlers @@ -278,8 +284,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) - resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) + credentials = cast( + AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) + resources = cast(AWSResources, AWSResources.deserialize( + config, cache, handlers)) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/aws/function.py b/sebs/aws/function.py index de4fcc6e..fbdb6d6f 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -59,12 +59,14 @@ def deserialize(cached_config: dict) -> "LambdaFunction": "HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger - }.get(trigger["type"]), + }.get(trigger["type"]), ) - assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + assert trigger_type, "Unknown trigger type {}".format( + trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret def code_bucket(self, benchmark: str, storage_client: S3): - self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + self.bucket = storage_client.get_bucket( + Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 79ca8905..bd550c52 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -54,7 +54,8 @@ def _create_bucket( for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + "Bucket {} for {} already exists, skipping.".format( + bucket_name, name) ) return bucket_name @@ -70,7 +71,8 @@ def _create_bucket( if self.region != "us-east-1": self.client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={"LocationConstraint": self.region}, + CreateBucketConfiguration={ + "LocationConstraint": self.region}, ) else: # This is incredible x2 - boto3 will not throw exception if you recreate @@ -86,7 +88,8 @@ def _create_bucket( self.logging.info("Created bucket {}".format(bucket_name)) except self.client.exceptions.BucketAlreadyExists as e: - self.logging.error(f"The bucket {bucket_name} exists already in region {self.region}!") + self.logging.error( + f"The bucket {bucket_name} exists already in region {self.region}!") raise e except self.client.exceptions.ClientError as e: self.logging.error( @@ -110,7 +113,8 @@ def uploader_func(self, path_idx, key, filepath): for f in self.input_prefixes_files[path_idx]: f_name = f if key == f_name: - self.logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) + self.logging.info( + "Skipping upload of {} to {}".format(filepath, bucket_name)) return self.upload(bucket_name, filepath, key) @@ -120,8 +124,10 @@ def upload(self, bucket_name: str, filepath: str, key: str): self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) def download(self, bucket_name: str, key: str, filepath: str): - self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) - self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) + self.logging.info("Download {}:{} to {}".format( + bucket_name, key, filepath)) + self.client.download_file( + Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: try: @@ -131,7 +137,8 @@ def exists_bucket(self, bucket_name: str) -> bool: return False def list_bucket(self, bucket_name: str, prefix: str = ""): - objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) + objects_list = self.client.list_objects_v2( + Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: objects = [obj["Key"] for obj in objects_list["Contents"]] @@ -149,8 +156,10 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def clean_bucket(self, bucket: str): objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: - objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore - self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore + objects = [{"Key": obj["Key"]} + for obj in objects["Contents"]] # type: ignore + self.client.delete_objects(Bucket=bucket, Delete={ + "Objects": objects}) # type: ignore def remove_bucket(self, bucket: str): self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index a3ed2ff3..9335237e 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -41,19 +41,22 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") + ret = client.invoke(FunctionName=self.name, + Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] if ret["StatusCode"] != 200: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result if "FunctionError" in ret: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result self.logging.debug(f"Invoke of function {self.name} was successful") @@ -67,7 +70,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: if isinstance(function_output["body"], dict): aws_result.parse_benchmark_output(function_output["body"]) else: - aws_result.parse_benchmark_output(json.loads(function_output["body"])) + aws_result.parse_benchmark_output( + json.loads(function_output["body"])) return aws_result def async_invoke(self, payload: dict): @@ -82,8 +86,10 @@ def async_invoke(self, payload: dict): LogType="Tail", ) if ret["StatusCode"] != 202: - self.logging.error("Async invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error( + "Async invocation of {} failed!".format(self.name)) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) raise RuntimeError() return ret @@ -157,7 +163,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Init clients lambda_client = self.deployment_client.get_lambda_client() - sqs_client = boto3.client('sqs', region_name=self.deployment_client.config.region) + sqs_client = boto3.client( + 'sqs', region_name=self.deployment_client.config.region) serialized_payload = json.dumps(payload) @@ -166,16 +173,16 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] queue_arn = sqs_client.get_queue_attributes( - QueueUrl=queue_url, - AttributeNames=["QueueArn"] - )["Attributes"]["QueueArn"] + QueueUrl=queue_url, + AttributeNames=["QueueArn"] + )["Attributes"]["QueueArn"] self.logging.debug("Created queue") # Add queue trigger if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, FunctionName=self.name) - ["EventSourceMappings"])): + ["EventSourceMappings"])): lambda_client.create_event_source_mapping( EventSourceArn=queue_arn, FunctionName=self.name, @@ -183,7 +190,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: ) # Publish payload to queue - sqs_client.send_message(QueueUrl=queue_url, MessageBody=serialized_payload) + sqs_client.send_message( + QueueUrl=queue_url, MessageBody=serialized_payload) self.logging.info(f"Sent message to queue {self.name}") # TODO(oana): gather metrics @@ -235,9 +243,10 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prep serialized_payload = json.dumps(payload) - bucket_name = self.name.replace('_', '-') # AWS disallows underscores in bucket names - function_arn = lambda_client.get_function(FunctionName=self.name) \ - ["Configuration"]["FunctionArn"] + # AWS disallows underscores in bucket names + bucket_name = self.name.replace('_', '-') + function_arn = lambda_client.get_function(FunctionName=self.name)[ + "Configuration"]["FunctionArn"] # Create bucket self.logging.info(f"Creating bucket {bucket_name}") @@ -275,7 +284,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: }, ]}) - + # Put object s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {bucket_name}") diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 3ac14499..aab84146 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -244,7 +244,7 @@ def package_code( benchmark_stripped, language_name, language_version, - self.config.resources_id, + self.config.resources.resources_id, trigger ) .replace(".", "-") @@ -269,7 +269,7 @@ def package_code( json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark_stripped), shell=True, cwd=directory) return directory, code_size def publish_function( @@ -581,18 +581,21 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) resource_group = self.config.resources.resource_group(self.cli_instance) storage_account = azure_function.function_storage.account_name - ret = self.cli_instance.execute( + user_principal_name = self.cli_instance.execute('az ad user list') + + storage_account_scope = self.cli_instance.execute( ('az storage account show --resource-group {} --name {} --query id') .format(resource_group, storage_account) ) + self.cli_instance.execute( ('az role assignment create --assignee "{}" \ --role "Storage {} Data Contributor" \ --scope {}') .format( - os.environ["AZURE_USER_PRINCIPAL_NAME"], + json.loads(user_principal_name.decode("utf-8"))[0]["userPrincipalName"], "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", - ret.decode("utf-8") + storage_account_scope.decode("utf-8") ) ) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 4f0fda73..f39c4bc4 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -524,9 +524,9 @@ def build( # package already exists if self.is_cached: - self._cache_client.update_code_package(self._deployment_name, self.language_name, self) + self._cache_client.update_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) else: - self._cache_client.add_code_package(self._deployment_name, self.language_name, self) + self._cache_client.add_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) self.query_cache() return True, self._code_location diff --git a/sebs/cache.py b/sebs/cache.py index ed5096e6..3a781b58 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -162,14 +162,21 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(cached_config, fp, indent=2) - def add_code_package(self, deployment_name: str, language_name: str, code_package: "Benchmark"): + def add_code_package( + self, deployment_name: str, language_name: str, code_package: "Benchmark", + trigger: Optional[str] + ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) os.makedirs(benchmark_dir, exist_ok=True) - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + + if (deployment_name == "azure"): + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) + else: + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if not os.path.exists(cached_dir): os.makedirs(cached_dir, exist_ok=True) @@ -231,14 +238,20 @@ def add_code_package(self, deployment_name: str, language_name: str, code_packag ) def update_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark" + self, deployment_name: str, language_name: str, code_package: "Benchmark", + trigger: Optional[str] ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + + cached_dir = "" + if (deployment_name == "azure"): + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) + else: + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if os.path.exists(cached_dir): # copy code diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 2ad08637..35b46119 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -151,7 +151,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prep # GCP is very particular with data encoding... - serialized_payload = base64.b64encode(json.dumps(payload).encode("ascii")) + serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) # Publish payload to queue pub_sub.projects().topics().publish( @@ -208,7 +208,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Upload object gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 - blob = bucket_instance.blob(blob_name=payload, chunk_size=4 * 1024 * 1024) + blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) blob.upload_from_filename(file_name) self.logging.info(f"Uploaded payload to bucket {bucket_name}") From 107b53f35008d191fddeee997d76894733e92031 Mon Sep 17 00:00:00 2001 From: orosca Date: Wed, 19 Jun 2024 13:36:32 -0400 Subject: [PATCH 07/19] Cache prep --- docs/modularity.md | 1 + sebs.py | 1 - sebs/aws/aws.py | 1 + sebs/azure/azure.py | 7 +++---- sebs/benchmark.py | 11 ++++------- sebs/cache.py | 23 ++++++----------------- sebs/faas/system.py | 1 + sebs/gcp/gcp.py | 1 + sebs/local/local.py | 1 + sebs/openwhisk/openwhisk.py | 1 + 10 files changed, 19 insertions(+), 29 deletions(-) diff --git a/docs/modularity.md b/docs/modularity.md index 7e3c7fcc..f2614655 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -303,6 +303,7 @@ Implement this step in the following function: language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int] ``` diff --git a/sebs.py b/sebs.py index 567074ae..9f0bf620 100755 --- a/sebs.py +++ b/sebs.py @@ -225,7 +225,6 @@ def invoke( sebs_client.config.image_tag_prefix = image_tag_prefix # Insert trigger into (experiment) config. Required by Azure when packaging. - # TODO(oana) is this still needed trigger = trigger if trigger is not None else "http" update_nested_dict(config, ["experiments", "trigger"], trigger) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index d48b8e17..73c56286 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -132,6 +132,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index aab84146..2946ed58 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -218,6 +218,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages @@ -237,11 +238,9 @@ def package_code( source_file = os.path.join(directory, f) shutil.move(source_file, handler_dir) - benchmark_stripped = '-'.join(benchmark.split("-")[:-1]) - trigger = benchmark.split("-")[-1] func_name = ( "{}-{}-{}-{}-{}".format( - benchmark_stripped, + benchmark, language_name, language_version, self.config.resources.resources_id, @@ -269,7 +268,7 @@ def package_code( json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark_stripped), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) return directory, code_size def publish_function( diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f39c4bc4..e18e0e20 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -494,10 +494,6 @@ def build( shutil.rmtree(self._output_dir) os.makedirs(self._output_dir) - benchmark = self.benchmark - if self._deployment_name == "azure": - benchmark = "{}-{}".format(benchmark, self._experiment_config.trigger) - self.copy_code(self._output_dir) self.add_benchmark_data(self._output_dir) self.add_deployment_files(self._output_dir) @@ -507,8 +503,9 @@ def build( os.path.abspath(self._output_dir), self.language_name, self.language_version, - benchmark, + self.benchmark, self.is_cached_valid, + self._experiment_config.trigger ) self.logging.info( ( @@ -524,9 +521,9 @@ def build( # package already exists if self.is_cached: - self._cache_client.update_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) + self._cache_client.update_code_package(self._deployment_name, self.language_name, self) else: - self._cache_client.add_code_package(self._deployment_name, self.language_name, self, self._experiment_config.trigger) + self._cache_client.add_code_package(self._deployment_name, self.language_name, self) self.query_cache() return True, self._code_location diff --git a/sebs/cache.py b/sebs/cache.py index 3a781b58..daf50ef9 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -163,20 +163,15 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): json.dump(cached_config, fp, indent=2) def add_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark", - trigger: Optional[str] + self, deployment_name: str, language_name: str, code_package: "Benchmark" ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) os.makedirs(benchmark_dir, exist_ok=True) - - if (deployment_name == "azure"): - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) - else: - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if not os.path.exists(cached_dir): os.makedirs(cached_dir, exist_ok=True) @@ -238,20 +233,14 @@ def add_code_package( ) def update_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark", - trigger: Optional[str] + self, deployment_name: str, language_name: str, code_package: "Benchmark" ): with self._lock: language = code_package.language_name language_version = code_package.language_version benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - - cached_dir = "" - if (deployment_name == "azure"): - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version, trigger) - else: - # Check if cache directory for this deployment exist - cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) + # Check if cache directory for this deployment exist + cached_dir = os.path.join(benchmark_dir, deployment_name, language, language_version) if os.path.exists(cached_dir): # copy code diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 17116e69..e126310a 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -167,6 +167,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: pass diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 94b15243..c351d204 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -220,6 +220,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/local/local.py b/sebs/local/local.py index cb1aabe2..1c975461 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -132,6 +132,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 00660de9..43c9cd54 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -208,6 +208,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: # Regardless of Docker image status, we need to create .zip file From ba67b4a8d4e739364eed7dc2e86cbdaf432e633b Mon Sep 17 00:00:00 2001 From: orosca Date: Fri, 5 Jul 2024 01:31:54 -0400 Subject: [PATCH 08/19] Address comments --- sebs/aws/triggers.py | 210 +++++++++++++++++++++++++---------------- sebs/azure/triggers.py | 133 ++++++++++++++++++-------- sebs/gcp/gcp.py | 88 +++++++++-------- sebs/gcp/triggers.py | 35 ++++--- 4 files changed, 294 insertions(+), 172 deletions(-) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 9335237e..2c62ef76 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -135,15 +135,64 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None, queue_arn: Optional[str] = None, queue_url: Optional[str] = None): super().__init__() self.name = fname - self._deployment_client = deployment_client + + self._deployment_client = None + self._queue_arn = None + self._queue_url = None + + if (deployment_client): + self._deployment_client = deployment_client + if (queue_arn): + self._queue_arn = queue_arn + if (queue_url): + self._queue_url = queue_url + + # When creating the trigger for the first time, also create and store + # queue information. + if (not self.queue_arn and not self.queue_url): + # Init clients + lambda_client = self.deployment_client.get_lambda_client() + sqs_client = boto3.client( + 'sqs', region_name=self.deployment_client.config.region) + + # Create queue + self.logging.debug(f"Creating queue {self.name}") + + self._queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] + self._queue_arn = sqs_client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=["QueueArn"] + )["Attributes"]["QueueArn"] + + self.logging.debug("Created queue") + + # Add queue trigger + if (not len(lambda_client.list_event_source_mappings(EventSourceArn=self.queue_arn, + FunctionName=self.name) + ["EventSourceMappings"])): + lambda_client.create_event_source_mapping( + EventSourceArn=self.queue_arn, + FunctionName=self.name, + MaximumBatchingWindowInSeconds=1 + ) @staticmethod def typename() -> str: return "AWS.QueueTrigger" + @property + def queue_arn(self) -> str: + assert self._queue_arn + return self._queue_arn + + @property + def queue_url(self) -> str: + assert self._queue_url + return self._queue_url + @property def deployment_client(self) -> AWS: assert self._deployment_client @@ -161,37 +210,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - # Init clients - lambda_client = self.deployment_client.get_lambda_client() sqs_client = boto3.client( 'sqs', region_name=self.deployment_client.config.region) - serialized_payload = json.dumps(payload) - - # Create queue - self.logging.debug(f"Creating queue {self.name}") - - queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] - queue_arn = sqs_client.get_queue_attributes( - QueueUrl=queue_url, - AttributeNames=["QueueArn"] - )["Attributes"]["QueueArn"] - - self.logging.debug("Created queue") - - # Add queue trigger - if (not len(lambda_client.list_event_source_mappings(EventSourceArn=queue_arn, - FunctionName=self.name) - ["EventSourceMappings"])): - lambda_client.create_event_source_mapping( - EventSourceArn=queue_arn, - FunctionName=self.name, - MaximumBatchingWindowInSeconds=1 - ) - # Publish payload to queue + serialized_payload = json.dumps(payload) sqs_client.send_message( - QueueUrl=queue_url, MessageBody=serialized_payload) + QueueUrl=self.queue_url, MessageBody=serialized_payload) self.logging.info(f"Sent message to queue {self.name}") # TODO(oana): gather metrics @@ -203,23 +228,89 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name} + return { + "type": "Queue", + "name": self.name, + "queue_arn": self.queue_arn, + "queue_url": self.queue_url + } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"]) + return QueueTrigger(obj["name"], None, obj["queue_arn"], obj["queue_url"]) class StorageTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + def __init__(self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None): super().__init__() self.name = fname - self._deployment_client = deployment_client + + self._deployment_client = None + self._bucket_name = None + + if (deployment_client): + self._deployment_client = deployment_client + if (bucket_name): + self._bucket_name = bucket_name + + # When creating the trigger for the first time, also create and store + # storage bucket information. + if (not self.bucket_name): + # Init clients + s3 = boto3.resource('s3') + lambda_client = self.deployment_client.get_lambda_client() + + # AWS disallows underscores in bucket names + self._bucket_name = self.name.replace('_', '-') + function_arn = lambda_client.get_function(FunctionName=self.name)[ + "Configuration"]["FunctionArn"] + + # Create bucket + self.logging.info(f"Creating bucket {self.bucket_name}") + + region = self.deployment_client.config.region + if (region == "us-east-1"): + s3.create_bucket(Bucket=self.bucket_name) + else: + s3.create_bucket( + Bucket=self.bucket_name, + CreateBucketConfiguration={ + "LocationConstraint": region + } + ) + + self.logging.info("Created bucket") + + lambda_client.add_permission( + FunctionName=self.name, + StatementId=str(uuid.uuid1()), + Action="lambda:InvokeFunction", + Principal="s3.amazonaws.com", + SourceArn=f"arn:aws:s3:::{self.bucket_name}", + ) + + # Add bucket trigger + bucket_notification = s3.BucketNotification(self.bucket_name) + bucket_notification.put( + NotificationConfiguration={'LambdaFunctionConfigurations': [ + { + 'LambdaFunctionArn': function_arn, + 'Events': [ + 's3:ObjectCreated:*' + ], + + }, + ]}) @staticmethod def typename() -> str: return "AWS.StorageTrigger" + @property + def bucket_name(self) -> AWS: + assert self._bucket_name + return self._bucket_name + @property def deployment_client(self) -> AWS: assert self._deployment_client @@ -237,57 +328,12 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - # Init clients - lambda_client = self.deployment_client.get_lambda_client() - s3 = boto3.resource('s3') - - # Prep serialized_payload = json.dumps(payload) - # AWS disallows underscores in bucket names - bucket_name = self.name.replace('_', '-') - function_arn = lambda_client.get_function(FunctionName=self.name)[ - "Configuration"]["FunctionArn"] - - # Create bucket - self.logging.info(f"Creating bucket {bucket_name}") - - region = self.deployment_client.config.region - if (region == "us-east-1"): - s3.create_bucket(Bucket=bucket_name) - else: - s3.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": region - } - ) - - self.logging.info("Created bucket") - - lambda_client.add_permission( - FunctionName=self.name, - StatementId=str(uuid.uuid1()), - Action="lambda:InvokeFunction", - Principal="s3.amazonaws.com", - SourceArn=f"arn:aws:s3:::{bucket_name}", - ) - - # Add bucket trigger - bucket_notification = s3.BucketNotification(bucket_name) - bucket_notification.put( - NotificationConfiguration={'LambdaFunctionConfigurations': [ - { - 'LambdaFunctionArn': function_arn, - 'Events': [ - 's3:ObjectCreated:*' - ], - - }, - ]}) # Put object - s3.Object(bucket_name, 'payload.json').put(Body=serialized_payload) - self.logging.info(f"Uploaded payload to bucket {bucket_name}") + s3 = boto3.resource('s3') + s3.Object(self.bucket_name, 'payload.json').put(Body=serialized_payload) + self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") # TODO(oana): gather metrics @@ -298,8 +344,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name} + return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"]) + return StorageTrigger(obj["name"], None, obj["bucket_name"]) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index ed3c3eb2..e74a3bcd 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -55,10 +55,35 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, storage_account: str): + def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = None): super().__init__() self.name = fname - self.storage_account = storage_account + self._storage_account = storage_account + self._queue_name = None + + if (queue_name): + self._queue_name = queue_name + else: + # Having a queue name field is currently a bit contrived - it is mostly a + # device to indicate that a trigger resource exists and is cached. In the + # future, we may adopt a different convention for naming trigger resources, + # at which point this will become truly useful. + self._queue_name = self.name + + # Init client + default_credential = DefaultAzureCredential() + queue_client = QueueClient(self.account_url, + queue_name=self.queue_name, + credential=default_credential) + + # Create queue + self.logging.info(f"Creating queue {self.queue_name}") + + try: + queue_client.create_queue() + self.logging.info("Created queue") + except ResourceExistsError: + self.logging.info("Queue already exists, reusing...") @staticmethod def typename() -> str: @@ -68,31 +93,34 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.QUEUE + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account + + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.queue.core.windows.net" + + @property + def queue_name(self) -> str: + assert self._queue_name + return self._queue_name + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") - # Init client - account_url = f"https://{self.storage_account}.queue.core.windows.net" + # Prepare queue client default_credential = DefaultAzureCredential() - queue_client = QueueClient(account_url, - queue_name=self.name, + queue_client = QueueClient(self.account_url, + queue_name=self.queue_name, credential=default_credential) - serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') - - # Create queue - self.logging.info(f"Creating queue {self.name}") - - try: - queue_client.create_queue() - self.logging.info("Created queue") - except ResourceExistsError: - self.logging.info("Queue already exists, reusing...") - # Publish payload to queue + serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') queue_client.send_message(serialized_payload) - self.logging.info(f"Sent message to queue {self.name}") + self.logging.info(f"Sent message to queue {self.queue_name}") # TODO(oana): gather metrics @@ -103,18 +131,39 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name, "storage_account": self.storage_account} + return {"type": "Queue", "name": self.name, "storage_account": self.storage_account, "queue_name": self.queue_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], obj["storage_account"]) + return QueueTrigger(obj["name"], obj["storage_account"], obj["queue_name"]) class StorageTrigger(Trigger): - def __init__(self, fname: str, storage_account: str): + def __init__(self, fname: str, storage_account: str, container_name: Optional[str] = None): super().__init__() self.name = fname - self.storage_account = storage_account + self._storage_account = storage_account + + if (container_name): + self._container_name = container_name + else: + # Having a container name field is currently a bit contrived - it is mostly + # a device to indicate that a trigger resource exists and is cached. In the + # future, we may adopt a different convention for naming trigger resources, + # at which point this will become truly useful. + self._container_name = self.name + + # Init client + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) + + # Create container + self.logging.info(f"Creating container {self.container_name}") + try: + blob_service_client.create_container(self.container_name) + self.logging.info("Created container") + except ResourceExistsError: + self.logging.info("Container already exists, reusing...") @staticmethod def typename() -> str: @@ -124,35 +173,39 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.STORAGE - def sync_invoke(self, payload: dict) -> ExecutionResult: + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account - self.logging.info(f"Invoke function {self.name}") + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.blob.core.windows.net" - # Init client - account_url = f"https://{self.storage_account}.blob.core.windows.net" - default_credential = DefaultAzureCredential() - blob_service_client = BlobServiceClient(account_url, credential=default_credential) + @property + def container_name(self) -> str: + assert self._container_name + return self._container_name - # Create container - container_name = self.name - self.logging.info(f"Creating container {container_name}") - try: - blob_service_client.create_container(container_name) - self.logging.info("Created container") - except ResourceExistsError: - self.logging.info("Container already exists, reusing...") + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") # Prepare blob file_name = "payload.json" with open(file_name, 'w') as fp: json.dump(payload, fp) + # Init client + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) + # Upload blob - blob_client = blob_service_client.get_blob_client(container=container_name, + blob_client = blob_service_client.get_blob_client(container=self.container_name, blob=file_name) with open(file=file_name, mode="rb") as payload: blob_client.upload_blob(payload, overwrite=True) - self.logging.info(f"Uploaded payload to container {container_name}") + self.logging.info(f"Uploaded payload to container {self.container_name}") # TODO(oana): gather metrics @@ -163,8 +216,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "storage_account": self.storage_account} + return {"type": "Storage", "name": self.name, "storage_account": self.storage_account, "container_name": self.container_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], obj["storage_account"]) + return StorageTrigger(obj["name"], obj["storage_account"], obj["container_name"]) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index c351d204..1f093876 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -106,22 +106,21 @@ def get_storage( """ Provide the fully qualified name of a trigger resource (queue or storage). """ + def get_trigger_resource_name(self, func_name: str) -> str: trigger = func_name.split("-")[-1] assert trigger == "queue" or trigger == "storage" - if (trigger == "queue"): - return 'projects/{project_name}/topics/{topic}'.format( - project_name=self.config.project_name, - topic=func_name + if trigger == "queue": + return "projects/{project_name}/topics/{topic}".format( + project_name=self.config.project_name, topic=func_name ) else: - return 'projects/{project_name}/buckets/{bucket}'.format( - project_name=self.config.project_name, - bucket=func_name + return "projects/{project_name}/buckets/{bucket}".format( + project_name=self.config.project_name, bucket=func_name ) - + """ Trigger resources (queue, bucket) must exist on GCP before the corresponding function is first deployed. @@ -133,23 +132,30 @@ def get_trigger_resource_name(self, func_name: str) -> str: :param func_name: the name of the function to be deployed, including its trigger + :param cached: when True, skip the creation of the actual resource + - merely create the configuration required to deploy the function. + This option is used in update_function() only. + :return: JSON/dict with the trigger configuration required by GCP on function creation/update """ - def create_trigger_resource(self, func_name: str) -> Dict: + + def create_trigger_resource(self, func_name: str, cached=False) -> Dict: trigger = func_name.split("-")[-1] - if (trigger == "queue"): - pub_sub = build("pubsub", "v1", cache_discovery=False) + if trigger == "queue": topic_name = self.get_trigger_resource_name(func_name) - - self.logging.info(f"Creating queue '{topic_name}'") - try: - pub_sub.projects().topics().create(name=topic_name).execute() - self.logging.info("Created queue") - except HttpError as http_error: - if (http_error.resp.status == 409): - self.logging.info("Queue already exists, reusing...") + + if not cached: + pub_sub = build("pubsub", "v1", cache_discovery=False) + + self.logging.info(f"Creating queue '{topic_name}'") + try: + pub_sub.projects().topics().create(name=topic_name).execute() + self.logging.info("Created queue") + except HttpError as http_error: + if http_error.resp.status == 409: + self.logging.info("Queue already exists, reusing...") return { "eventTrigger": { @@ -158,21 +164,23 @@ def create_trigger_resource(self, func_name: str) -> Dict: }, "entryPoint": "handler_queue", } - elif (trigger == "storage"): - storage = build("storage", "v1", cache_discovery=False) + elif trigger == "storage": bucket_name = self.get_trigger_resource_name(func_name) - self.logging.info(f"Creating storage bucket '{bucket_name}'") - try: - storage.buckets().insert( - project=self.config.project_name, - body={ "name": func_name }, - ).execute() - self.logging.info("Created storage bucket") - except HttpError as http_error: - if (http_error.resp.status == 409): - self.logging.info("Storage bucket already exists, reusing...") - + if not cached: + storage = build("storage", "v1", cache_discovery=False) + + self.logging.info(f"Creating storage bucket '{bucket_name}'") + try: + storage.buckets().insert( + project=self.config.project_name, + body={"name": func_name}, + ).execute() + self.logging.info("Created storage bucket") + except HttpError as http_error: + if http_error.resp.status == 409: + self.logging.info("Storage bucket already exists, reusing...") + return { "eventTrigger": { "eventType": "google.storage.object.finalize", @@ -181,7 +189,7 @@ def create_trigger_resource(self, func_name: str) -> Dict: "entryPoint": "handler_storage", } # HTTP triggers do not require resource creation - return { "httpsTrigger": {}, "entryPoint": "handler_http" } + return {"httpsTrigger": {}, "entryPoint": "handler_http"} @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -318,7 +326,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti "timeout": str(timeout) + "s", "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - } | trigger_info, + } + | trigger_info, ) ) create_req.execute() @@ -390,10 +399,12 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) trigger = HTTPTrigger(invoke_url) self.logging.info(f"Created HTTP trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.QUEUE: - trigger = QueueTrigger(function.name, self) + trigger = QueueTrigger( + function.name, self.get_trigger_resource_name(function.name), self + ) self.logging.info(f"Created Queue trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name) + trigger = StorageTrigger(function.name, self.get_trigger_resource_name(function.name)) self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") @@ -437,7 +448,7 @@ def update_function(self, function: Function, code_package: Benchmark): # Before creating the function, ensure all trigger resources (queue, # bucket) exist on GCP. - trigger_info = self.create_trigger_resource(function.name) + trigger_info = self.create_trigger_resource(function.name, cached=True) req = ( self.function_client.projects() @@ -451,7 +462,8 @@ def update_function(self, function: Function, code_package: Benchmark): "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - } | trigger_info, + } + | trigger_info, ) ) res = req.execute() diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 35b46119..753113c1 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -120,15 +120,21 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + def __init__(self, fname: str, queue_name: str, deployment_client: Optional[GCP] = None): super().__init__() self.name = fname self._deployment_client = deployment_client + self._queue_name = queue_name @staticmethod def typename() -> str: return "GCP.QueueTrigger" + @property + def queue_name(self) -> GCP: + assert self._queue_name + return self._queue_name + @property def deployment_client(self) -> GCP: assert self._deployment_client @@ -149,13 +155,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Init client pub_sub = build("pubsub", "v1", cache_discovery=False) - # Prep + # Prepare payload # GCP is very particular with data encoding... serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) # Publish payload to queue pub_sub.projects().topics().publish( - topic=self.deployment_client.get_trigger_resource_name(self.name), + topic=self.queue_name, body={ "messages": [{ "data": serialized_payload.decode("utf-8") @@ -172,17 +178,18 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name} + return {"type": "Queue", "name": self.name, "queue_name": self.queue_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"]) + return QueueTrigger(obj["name"], obj["queue_name"]) class StorageTrigger(Trigger): - def __init__(self, fname: str): + def __init__(self, fname: str, bucket_name: str): super().__init__() self.name = fname + self._bucket_name = bucket_name @staticmethod def typename() -> str: @@ -192,16 +199,20 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.STORAGE + @property + def bucket_name(self) -> GCP: + assert self._bucket_name + return self._bucket_name + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") # Init clients - bucket_name = self.name client = gcp_storage.Client(); - bucket_instance = client.bucket(bucket_name) + bucket_instance = client.bucket(self.bucket_name) - # Prep + # Prepare payload file_name = "payload.json" with open(file_name, "w") as fp: json.dump(payload, fp) @@ -211,7 +222,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) blob.upload_from_filename(file_name) - self.logging.info(f"Uploaded payload to bucket {bucket_name}") + self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") # TODO(oana): gather metrics @@ -222,8 +233,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name} + return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"]) + return StorageTrigger(obj["name"], obj["bucket_name"]) From 94a675aa0a4e30fe9888308a69e8289f3843f343 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:12:05 +0200 Subject: [PATCH 09/19] [aws] Linting --- sebs/aws/aws.py | 55 ++++++++-------------- sebs/aws/config.py | 24 ++++------ sebs/aws/function.py | 17 ++++--- sebs/aws/s3.py | 27 ++++------- sebs/aws/triggers.py | 107 +++++++++++++++++++++---------------------- 5 files changed, 97 insertions(+), 133 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 73c56286..92c65dcc 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -150,13 +150,11 @@ def package_code( # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(benchmark), - shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) self.logging.info("Created {} archive".format(benchmark_archive)) - bytes_size = os.path.getsize( - os.path.join(directory, benchmark_archive)) + bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive)) mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) @@ -189,8 +187,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun try: ret = self.client.get_function(FunctionName=func_name) self.logging.info( - "Function {} exists on AWS, retrieve configuration.".format( - func_name) + "Function {} exists on AWS, retrieve configuration.".format(func_name) ) # Here we assume a single Lambda role lambda_function = LambdaFunction( @@ -206,8 +203,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun lambda_function.updated_code = True # TODO: get configuration of REST API except self.client.exceptions.ResourceNotFoundException: - self.logging.info( - "Creating function {} from {}".format(func_name, package)) + self.logging.info("Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size # Limit to 50 MB @@ -221,19 +217,16 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun else: code_package_name = cast(str, os.path.basename(package)) - code_bucket = storage_client.get_bucket( - Resources.StorageBucketType.DEPLOYMENT) + code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) code_prefix = os.path.join(benchmark, code_package_name) storage_client.upload(code_bucket, package, code_prefix) - self.logging.info( - "Uploading function {} code to {}".format(func_name, code_bucket)) + self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} ret = self.client.create_function( FunctionName=func_name, Runtime="{}{}".format( - language, self._map_language_runtime( - language, language_runtime) + language, self._map_language_runtime(language, language_runtime) ), Handler="handler.handler", Role=self.config.resources.lambda_role(self.session), @@ -301,8 +294,7 @@ def update_function(self, function: Function, code_package: Benchmark): # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.client.update_function_code( - FunctionName=name, ZipFile=code_body.read()) + self.client.update_function_code(FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -331,8 +323,7 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark MemorySize=function.config.memory, ) self.wait_function_updated(function) - self.logging.info( - f"Updated configuration of {function.name} function. ") + self.logging.info(f"Updated configuration of {function.name} function. ") @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -401,12 +392,10 @@ def parse_aws_report( return request_id output = requests[request_id] output.request_id = request_id - output.provider_times.execution = int( - float(aws_vals["Duration"]) * 1000) + output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: - output.provider_times.initialization = int( - float(aws_vals["Init Duration"]) * 1000) + output.provider_times.initialization = int(float(aws_vals["Init Duration"]) * 1000) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory @@ -440,14 +429,12 @@ def get_invocation_error(self, function_name: str, start_time: int, end_time: in time.sleep(5) response = self.logs_client.get_query_results(queryId=query_id) if len(response["results"]) == 0: - self.logging.info( - "AWS logs are not yet available, repeat after 15s...") + self.logging.info("AWS logs are not yet available, repeat after 15s...") time.sleep(15) response = None else: break - self.logging.error( - f"Invocation error for AWS Lambda function {function_name}") + self.logging.error(f"Invocation error for AWS Lambda function {function_name}") for message in response["results"]: for value in message: if value["field"] == "@message": @@ -494,8 +481,7 @@ def download_metrics( for val in results: for result_part in val: if result_part["field"] == "@message": - request_id = AWS.parse_aws_report( - result_part["value"], requests) + request_id = AWS.parse_aws_report(result_part["value"], requests) if request_id in requests: results_processed += 1 requests_ids.remove(request_id) @@ -509,11 +495,11 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T function = cast(LambdaFunction, func) + trigger: Trigger if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api( - api_name, function, self.session) + http_api = self.config.resources.http_api(api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ # but this is wrong - source arn must be {api-arn}/*/* self.get_lambda_client().add_permission( @@ -536,13 +522,11 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info( - f"Created Queue trigger for {func.name} function.") + self.logging.info(f"Created Queue trigger for {func.name} function.") elif trigger_type == Trigger.TriggerType.STORAGE: trigger = StorageTrigger(func.name, self) trigger.logging_handlers = self.logging_handlers - self.logging.info( - f"Created Storage trigger for {func.name} function.") + self.logging.info(f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") @@ -556,8 +540,7 @@ def _enforce_cold_start(self, function: Function): FunctionName=func.name, Timeout=func.config.timeout, MemorySize=func.config.memory, - Environment={"Variables": { - "ForceColdStart": str(self.cold_start_counter)}}, + Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 6de965d4..44c9a490 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -85,8 +85,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def update_cache(self, cache: Cache): - cache.update_config(val=self.account_id, keys=[ - "aws", "credentials", "account_id"]) + cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) def serialize(self) -> dict: out = {"account_id": self._account_id} @@ -146,8 +145,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: try: out = iam_client.get_role(RoleName=role_name) self._lambda_role = out["Role"]["Arn"] - self.logging.info( - f"AWS: Selected {self._lambda_role} IAM role") + self.logging.info(f"AWS: Selected {self._lambda_role} IAM role") except iam_client.exceptions.NoSuchEntityException: out = iam_client.create_role( RoleName=role_name, @@ -161,8 +159,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: time.sleep(10) # Attach basic AWS Lambda and S3 policies. for policy in attached_policies: - iam_client.attach_role_policy( - RoleName=role_name, PolicyArn=policy) + iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy) return self._lambda_role def http_api( @@ -224,11 +221,9 @@ def serialize(self) -> dict: def update_cache(self, cache: Cache): super().update_cache(cache) - cache.update_config(val=self._lambda_role, keys=[ - "aws", "resources", "lambda-role"]) + cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): - cache.update_config(val=api.serialize(), keys=[ - "aws", "resources", "http-apis", name]) + cache.update_config(val=api.serialize(), keys=["aws", "resources", "http-apis", name]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -245,8 +240,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour if "resources" in config: AWSResources.initialize(ret, config["resources"]) ret.logging_handlers = handlers - ret.logging.info( - "No cached resources for AWS found, using user configuration.") + ret.logging.info("No cached resources for AWS found, using user configuration.") else: AWSResources.initialize(ret, {}) ret.logging_handlers = handlers @@ -284,10 +278,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast( - AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) - resources = cast(AWSResources, AWSResources.deserialize( - config, cache, handlers)) + credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) + resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/aws/function.py b/sebs/aws/function.py index fbdb6d6f..24ce4a8d 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -55,18 +55,17 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, - "HTTP": HTTPTrigger, - "Queue": QueueTrigger, - "Storage": StorageTrigger - }.get(trigger["type"]), + { + "Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger, + }.get(trigger["type"]), ) - assert trigger_type, "Unknown trigger type {}".format( - trigger["type"]) + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret def code_bucket(self, benchmark: str, storage_client: S3): - self.bucket = storage_client.get_bucket( - Resources.StorageBucketType.DEPLOYMENT) + self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index bd550c52..79ca8905 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -54,8 +54,7 @@ def _create_bucket( for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format( - bucket_name, name) + "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name @@ -71,8 +70,7 @@ def _create_bucket( if self.region != "us-east-1": self.client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": self.region}, + CreateBucketConfiguration={"LocationConstraint": self.region}, ) else: # This is incredible x2 - boto3 will not throw exception if you recreate @@ -88,8 +86,7 @@ def _create_bucket( self.logging.info("Created bucket {}".format(bucket_name)) except self.client.exceptions.BucketAlreadyExists as e: - self.logging.error( - f"The bucket {bucket_name} exists already in region {self.region}!") + self.logging.error(f"The bucket {bucket_name} exists already in region {self.region}!") raise e except self.client.exceptions.ClientError as e: self.logging.error( @@ -113,8 +110,7 @@ def uploader_func(self, path_idx, key, filepath): for f in self.input_prefixes_files[path_idx]: f_name = f if key == f_name: - self.logging.info( - "Skipping upload of {} to {}".format(filepath, bucket_name)) + self.logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) return self.upload(bucket_name, filepath, key) @@ -124,10 +120,8 @@ def upload(self, bucket_name: str, filepath: str, key: str): self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) def download(self, bucket_name: str, key: str, filepath: str): - self.logging.info("Download {}:{} to {}".format( - bucket_name, key, filepath)) - self.client.download_file( - Bucket=bucket_name, Key=key, Filename=filepath) + self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) + self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: try: @@ -137,8 +131,7 @@ def exists_bucket(self, bucket_name: str) -> bool: return False def list_bucket(self, bucket_name: str, prefix: str = ""): - objects_list = self.client.list_objects_v2( - Bucket=bucket_name, Prefix=prefix) + objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: objects = [obj["Key"] for obj in objects_list["Contents"]] @@ -156,10 +149,8 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def clean_bucket(self, bucket: str): objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: - objects = [{"Key": obj["Key"]} - for obj in objects["Contents"]] # type: ignore - self.client.delete_objects(Bucket=bucket, Delete={ - "Objects": objects}) # type: ignore + objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore + self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore def remove_bucket(self, bucket: str): self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 2c62ef76..c1a47e4f 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -41,22 +41,19 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke(FunctionName=self.name, - Payload=serialized_payload, LogType="Tail") + ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] if ret["StatusCode"] != 200: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result if "FunctionError" in ret: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result self.logging.debug(f"Invoke of function {self.name} was successful") @@ -70,8 +67,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: if isinstance(function_output["body"], dict): aws_result.parse_benchmark_output(function_output["body"]) else: - aws_result.parse_benchmark_output( - json.loads(function_output["body"])) + aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result def async_invoke(self, payload: dict): @@ -86,10 +82,8 @@ def async_invoke(self, payload: dict): LogType="Tail", ) if ret["StatusCode"] != 202: - self.logging.error( - "Async invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Async invocation of {} failed!".format(self.name)) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) raise RuntimeError() return ret @@ -135,7 +129,13 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None, queue_arn: Optional[str] = None, queue_url: Optional[str] = None): + def __init__( + self, + fname: str, + deployment_client: Optional[AWS] = None, + queue_arn: Optional[str] = None, + queue_url: Optional[str] = None, + ): super().__init__() self.name = fname @@ -143,40 +143,40 @@ def __init__(self, fname: str, deployment_client: Optional[AWS] = None, queue_ar self._queue_arn = None self._queue_url = None - if (deployment_client): + if deployment_client: self._deployment_client = deployment_client - if (queue_arn): + if queue_arn: self._queue_arn = queue_arn - if (queue_url): + if queue_url: self._queue_url = queue_url # When creating the trigger for the first time, also create and store # queue information. - if (not self.queue_arn and not self.queue_url): + if not self.queue_arn and not self.queue_url: # Init clients lambda_client = self.deployment_client.get_lambda_client() - sqs_client = boto3.client( - 'sqs', region_name=self.deployment_client.config.region) - + sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) + # Create queue self.logging.debug(f"Creating queue {self.name}") self._queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] self._queue_arn = sqs_client.get_queue_attributes( - QueueUrl=self.queue_url, - AttributeNames=["QueueArn"] + QueueUrl=self.queue_url, AttributeNames=["QueueArn"] )["Attributes"]["QueueArn"] self.logging.debug("Created queue") # Add queue trigger - if (not len(lambda_client.list_event_source_mappings(EventSourceArn=self.queue_arn, - FunctionName=self.name) - ["EventSourceMappings"])): + if not len( + lambda_client.list_event_source_mappings( + EventSourceArn=self.queue_arn, FunctionName=self.name + )["EventSourceMappings"] + ): lambda_client.create_event_source_mapping( EventSourceArn=self.queue_arn, FunctionName=self.name, - MaximumBatchingWindowInSeconds=1 + MaximumBatchingWindowInSeconds=1, ) @staticmethod @@ -210,13 +210,11 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - sqs_client = boto3.client( - 'sqs', region_name=self.deployment_client.config.region) + sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) # Publish payload to queue serialized_payload = json.dumps(payload) - sqs_client.send_message( - QueueUrl=self.queue_url, MessageBody=serialized_payload) + sqs_client.send_message(QueueUrl=self.queue_url, MessageBody=serialized_payload) self.logging.info(f"Sent message to queue {self.name}") # TODO(oana): gather metrics @@ -232,7 +230,7 @@ def serialize(self) -> dict: "type": "Queue", "name": self.name, "queue_arn": self.queue_arn, - "queue_url": self.queue_url + "queue_url": self.queue_url, } @staticmethod @@ -241,42 +239,43 @@ def deserialize(obj: dict) -> Trigger: class StorageTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None): + def __init__( + self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None + ): super().__init__() self.name = fname self._deployment_client = None self._bucket_name = None - if (deployment_client): + if deployment_client: self._deployment_client = deployment_client - if (bucket_name): + if bucket_name: self._bucket_name = bucket_name # When creating the trigger for the first time, also create and store # storage bucket information. - if (not self.bucket_name): + if not self.bucket_name: # Init clients - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") lambda_client = self.deployment_client.get_lambda_client() # AWS disallows underscores in bucket names - self._bucket_name = self.name.replace('_', '-') - function_arn = lambda_client.get_function(FunctionName=self.name)[ - "Configuration"]["FunctionArn"] + self._bucket_name = self.name.replace("_", "-") + function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"][ + "FunctionArn" + ] # Create bucket self.logging.info(f"Creating bucket {self.bucket_name}") region = self.deployment_client.config.region - if (region == "us-east-1"): + if region == "us-east-1": s3.create_bucket(Bucket=self.bucket_name) else: s3.create_bucket( Bucket=self.bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": region - } + CreateBucketConfiguration={"LocationConstraint": region}, ) self.logging.info("Created bucket") @@ -292,15 +291,15 @@ def __init__(self, fname: str, deployment_client: Optional[AWS] = None, bucket_n # Add bucket trigger bucket_notification = s3.BucketNotification(self.bucket_name) bucket_notification.put( - NotificationConfiguration={'LambdaFunctionConfigurations': [ - { - 'LambdaFunctionArn': function_arn, - 'Events': [ - 's3:ObjectCreated:*' - ], - - }, - ]}) + NotificationConfiguration={ + "LambdaFunctionConfigurations": [ + { + "LambdaFunctionArn": function_arn, + "Events": ["s3:ObjectCreated:*"], + }, + ] + } + ) @staticmethod def typename() -> str: @@ -331,8 +330,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload) # Put object - s3 = boto3.resource('s3') - s3.Object(self.bucket_name, 'payload.json').put(Body=serialized_payload) + s3 = boto3.resource("s3") + s3.Object(self.bucket_name, "payload.json").put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") # TODO(oana): gather metrics From bb0ade599792da87e5c9777868db4480c9f83082 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:13:31 +0200 Subject: [PATCH 10/19] [azure] Linting --- sebs/azure/azure.py | 43 +++++++++++++++++++++++----------------- sebs/azure/function.py | 7 +++---- sebs/azure/triggers.py | 45 ++++++++++++++++++++++++++---------------- 3 files changed, 56 insertions(+), 39 deletions(-) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 2946ed58..03945274 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -157,10 +157,11 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: :param exec_files: the files which define and implement the function to be executed :return: JSON dictionary containing the function configuration """ + def create_function_json(self, benchmark, exec_files) -> Dict: trigger = benchmark.split("-")[-1] - if (trigger == "queue"): + if trigger == "queue": return { "scriptFile": exec_files, "entryPoint": "handler_queue", @@ -170,11 +171,11 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "queueTrigger", "direction": "in", "queueName": benchmark, - "connection": "AzureWebJobsStorage" + "connection": "AzureWebJobsStorage", } - ] + ], } - elif (trigger == "storage"): + elif trigger == "storage": return { "scriptFile": exec_files, "entryPoint": "handler_storage", @@ -184,9 +185,9 @@ def create_function_json(self, benchmark, exec_files) -> Dict: "type": "blobTrigger", "direction": "in", "path": benchmark, - "connection": "AzureWebJobsStorage" + "connection": "AzureWebJobsStorage", } - ] + ], } return { # HTTP "scriptFile": exec_files, @@ -202,7 +203,7 @@ def create_function_json(self, benchmark, exec_files) -> Dict: {"type": "http", "direction": "out", "name": "$return"}, ], } - + # Directory structure # handler # - source files @@ -244,7 +245,7 @@ def package_code( language_name, language_version, self.config.resources.resources_id, - trigger + trigger, ) .replace(".", "-") .replace("_", "-") @@ -254,7 +255,8 @@ def package_code( json_out = os.path.join(directory, "handler", "function.json") json.dump( self.create_function_json(func_name, EXEC_FILES[language_name]), - open(json_out, "w"), indent=2 + open(json_out, "w"), + indent=2, ) # generate host.json @@ -350,8 +352,10 @@ def update_function(self, function: Function, code_package: Benchmark): url = self.publish_function(function, code_package, True) # TODO(oana): this might need refactoring - if (function.name.endswith("http")): - trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) + if function.name.endswith("http"): + trigger = HTTPTrigger( + url, self.config.resources.data_storage_account(self.cli_instance) + ) trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) @@ -580,21 +584,23 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) resource_group = self.config.resources.resource_group(self.cli_instance) storage_account = azure_function.function_storage.account_name - user_principal_name = self.cli_instance.execute('az ad user list') + user_principal_name = self.cli_instance.execute("az ad user list") storage_account_scope = self.cli_instance.execute( - ('az storage account show --resource-group {} --name {} --query id') - .format(resource_group, storage_account) + ("az storage account show --resource-group {} --name {} --query id").format( + resource_group, storage_account + ) ) self.cli_instance.execute( - ('az role assignment create --assignee "{}" \ + ( + 'az role assignment create --assignee "{}" \ --role "Storage {} Data Contributor" \ - --scope {}') - .format( + --scope {}' + ).format( json.loads(user_principal_name.decode("utf-8"))[0]["userPrincipalName"], "Queue" if trigger_type == Trigger.TriggerType.QUEUE else "Blob", - storage_account_scope.decode("utf-8") + storage_account_scope.decode("utf-8"), ) ) @@ -612,6 +618,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.cache_client.update_function(function) return trigger + # # def create_azure_function(self, fname, config): # diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 8970d90d..375c0b79 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -42,10 +42,9 @@ def deserialize(cached_config: dict) -> Function: for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"HTTP": HTTPTrigger, - "Queue": QueueTrigger, - "Storage": StorageTrigger - }.get(trigger["type"]) + {"HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get( + trigger["type"] + ), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index e74a3bcd..213215aa 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -59,9 +59,9 @@ def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = super().__init__() self.name = fname self._storage_account = storage_account - self._queue_name = None + self._queue_name = None - if (queue_name): + if queue_name: self._queue_name = queue_name else: # Having a queue name field is currently a bit contrived - it is mostly a @@ -69,12 +69,12 @@ def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = # future, we may adopt a different convention for naming trigger resources, # at which point this will become truly useful. self._queue_name = self.name - + # Init client default_credential = DefaultAzureCredential() - queue_client = QueueClient(self.account_url, - queue_name=self.queue_name, - credential=default_credential) + queue_client = QueueClient( + self.account_url, queue_name=self.queue_name, credential=default_credential + ) # Create queue self.logging.info(f"Creating queue {self.queue_name}") @@ -97,7 +97,7 @@ def trigger_type() -> Trigger.TriggerType: def storage_account(self) -> str: assert self._storage_account return self._storage_account - + @property def account_url(self) -> str: return f"https://{self.storage_account}.queue.core.windows.net" @@ -113,12 +113,12 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prepare queue client default_credential = DefaultAzureCredential() - queue_client = QueueClient(self.account_url, - queue_name=self.queue_name, - credential=default_credential) + queue_client = QueueClient( + self.account_url, queue_name=self.queue_name, credential=default_credential + ) # Publish payload to queue - serialized_payload = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') + serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8") queue_client.send_message(serialized_payload) self.logging.info(f"Sent message to queue {self.queue_name}") @@ -131,7 +131,12 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name, "storage_account": self.storage_account, "queue_name": self.queue_name} + return { + "type": "Queue", + "name": self.name, + "storage_account": self.storage_account, + "queue_name": self.queue_name, + } @staticmethod def deserialize(obj: dict) -> Trigger: @@ -144,7 +149,7 @@ def __init__(self, fname: str, storage_account: str, container_name: Optional[st self.name = fname self._storage_account = storage_account - if (container_name): + if container_name: self._container_name = container_name else: # Having a container name field is currently a bit contrived - it is mostly @@ -193,7 +198,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Prepare blob file_name = "payload.json" - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: json.dump(payload, fp) # Init client @@ -201,8 +206,9 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) # Upload blob - blob_client = blob_service_client.get_blob_client(container=self.container_name, - blob=file_name) + blob_client = blob_service_client.get_blob_client( + container=self.container_name, blob=file_name + ) with open(file=file_name, mode="rb") as payload: blob_client.upload_blob(payload, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") @@ -216,7 +222,12 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "storage_account": self.storage_account, "container_name": self.container_name} + return { + "type": "Storage", + "name": self.name, + "storage_account": self.storage_account, + "container_name": self.container_name, + } @staticmethod def deserialize(obj: dict) -> Trigger: From 97d63450026c533a3cd50aa1abd5668ebae22f35 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:13:56 +0200 Subject: [PATCH 11/19] [gcp] Linting --- sebs/benchmark.py | 2 +- sebs/gcp/function.py | 12 ++++++------ sebs/gcp/triggers.py | 14 ++++++-------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index e18e0e20..1114a296 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -505,7 +505,7 @@ def build( self.language_version, self.benchmark, self.is_cached_valid, - self._experiment_config.trigger + self._experiment_config.trigger, ) self.logging.info( ( diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index f2fb5ca4..09cab242 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -30,8 +30,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, \ - QueueTrigger, StorageTrigger + from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = GCPFunction( @@ -44,10 +43,11 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, - "HTTP": HTTPTrigger, - "Queue": QueueTrigger, - "Storage": StorageTrigger + { + "Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger, }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 753113c1..b20708fb 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -161,13 +161,11 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Publish payload to queue pub_sub.projects().topics().publish( - topic=self.queue_name, - body={ - "messages": [{ - "data": serialized_payload.decode("utf-8") - }], - } - ).execute() + topic=self.queue_name, + body={ + "messages": [{"data": serialized_payload.decode("utf-8")}], + }, + ).execute() # TODO(oana): gather metrics @@ -209,7 +207,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") # Init clients - client = gcp_storage.Client(); + client = gcp_storage.Client() bucket_instance = client.bucket(self.bucket_name) # Prepare payload From be4e4f9b7f97500f5371dd4310397fe129d6b38d Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:18:46 +0200 Subject: [PATCH 12/19] [system] Fix incorrect callback type --- sebs/benchmark.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 1114a296..8e2a5a86 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -5,7 +5,7 @@ import shutil import subprocess from abc import abstractmethod -from typing import Any, Callable, Dict, List, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple import docker @@ -13,6 +13,7 @@ from sebs.cache import Cache from sebs.faas.config import Resources from sebs.utils import find_benchmark, project_absolute_path, LoggingBase +from sebs.faas.function import Trigger from sebs.faas.storage import PersistentStorage from typing import TYPE_CHECKING @@ -470,7 +471,10 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] + self, + deployment_build_step: Callable[ + [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] + ], ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild From debbda0e5ac4d6c6ba8fc755a018b4cbbaf1467f Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:39:00 +0200 Subject: [PATCH 13/19] [gcp] Help mypy determine the type --- sebs/gcp/gcp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 1f093876..9f031887 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -394,6 +394,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) time.sleep(3) self.logging.info(f"Function {function.name} - deployed!") + trigger: Trigger if trigger_type == Trigger.TriggerType.HTTP: invoke_url = status_res["httpsTrigger"]["url"] trigger = HTTPTrigger(invoke_url) From 3e52f3ab905cd8fa5834fb96bb8421813e206e46 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:40:51 +0200 Subject: [PATCH 14/19] [aws] Fix return type --- sebs/aws/triggers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index c1a47e4f..39f022ee 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -306,7 +306,7 @@ def typename() -> str: return "AWS.StorageTrigger" @property - def bucket_name(self) -> AWS: + def bucket_name(self) -> str: assert self._bucket_name return self._bucket_name From 4bd7a20e8a5e0ff13fb1a762a3d490da2530de9b Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:43:34 +0200 Subject: [PATCH 15/19] [azure] fix var name confusing mypy --- sebs/azure/triggers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 213215aa..fc5c1e36 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -209,8 +209,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client = blob_service_client.get_blob_client( container=self.container_name, blob=file_name ) - with open(file=file_name, mode="rb") as payload: - blob_client.upload_blob(payload, overwrite=True) + with open(file=file_name, mode="rb") as payload_data: + blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") # TODO(oana): gather metrics From a466aab1d84206a28661308420bc2fdbca0a64a6 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 11 Jul 2024 01:49:52 +0200 Subject: [PATCH 16/19] [gcp] [azure] Fix linting issues --- sebs/azure/azure.py | 1 + sebs/gcp/gcp.py | 2 +- sebs/gcp/triggers.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 03945274..bae91f38 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -604,6 +604,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) ) ) + trigger: Trigger if trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger(function.name, storage_account) self.logging.info(f"Created Queue trigger for {function.name} function") diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 9f031887..6691f1b5 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -420,6 +420,7 @@ def cached_function(self, function: Function): from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger, QueueTrigger, StorageTrigger + gcp_trigger: Trigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers @@ -431,7 +432,6 @@ def cached_function(self, function: Function): for trigger in function.triggers(Trigger.TriggerType.STORAGE): gcp_trigger = cast(StorageTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers - gcp_trigger.deployment_client = self def update_function(self, function: Function, code_package: Benchmark): diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index b20708fb..556b46a9 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -131,7 +131,7 @@ def typename() -> str: return "GCP.QueueTrigger" @property - def queue_name(self) -> GCP: + def queue_name(self) -> str: assert self._queue_name return self._queue_name @@ -198,7 +198,7 @@ def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.STORAGE @property - def bucket_name(self) -> GCP: + def bucket_name(self) -> str: assert self._bucket_name return self._bucket_name From 0b310c195519c6359ad3132aff687d07c0019974 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 9 Sep 2024 00:24:47 +0200 Subject: [PATCH 17/19] Measurements infrastructure with queues --- benchmarks/wrappers/aws/python/handler.py | 47 ++++-- benchmarks/wrappers/aws/python/queue.py | 14 ++ benchmarks/wrappers/azure/python/handler.py | 81 +++++++--- benchmarks/wrappers/azure/python/queue.py | 15 ++ benchmarks/wrappers/gcp/python/handler.py | 89 +++++++---- benchmarks/wrappers/gcp/python/queue.py | 14 ++ config/systems.json | 13 +- docs/modularity.md | 3 +- requirements.gcp.txt | 1 + scripts/run_experiments.py | 1 + sebs/aws/aws.py | 1 + sebs/aws/triggers.py | 150 ++++++++++++------ sebs/azure/azure.py | 40 ++++- sebs/azure/triggers.py | 165 +++++++++++++++----- sebs/gcp/gcp.py | 13 +- sebs/gcp/triggers.py | 122 +++++++++++++-- tests/aws/create_function.py | 4 +- 17 files changed, 582 insertions(+), 191 deletions(-) create mode 100644 benchmarks/wrappers/aws/python/queue.py create mode 100644 benchmarks/wrappers/azure/python/queue.py create mode 100644 benchmarks/wrappers/gcp/python/queue.py diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 2601dddf..a100393a 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -7,10 +7,16 @@ def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() + # Flag to indicate whether the measurements should be returned as an HTTP + # response or via a result queue. + return_http = True + # Queue trigger if ("Records" in event and event["Records"][0]["eventSource"] == 'aws:sqs'): event = json.loads(event["Records"][0]["body"]) + return_http = False + # Storage trigger if ("Records" in event and "s3" in event["Records"][0]): bucket_name = event["Records"][0]["s3"]["bucket"]["name"] @@ -22,6 +28,8 @@ def handler(event, context): obj = storage_inst.get_object(bucket_name, file_name) event = json.loads(obj['Body'].read()) + return_http = False + # HTTP trigger with API Gateaway if 'body' in event: event = json.loads(event['body']) @@ -68,17 +76,30 @@ def handler(event, context): if "cold_start" in os.environ: cold_start_var = os.environ["cold_start"] - return { - 'statusCode': 200, - 'body': json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': context.aws_request_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }) - } + stats = json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': context.aws_request_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }) + + # HTTP or library trigger: return an HTTP response. + if (return_http): + return { + 'statusCode': 200, + 'body': stats + } + + # Queue or storage trigger: return via a result queue. + arn = context.invoked_function_arn.split(":") + region = arn[3] + account_id = arn[4] + queue_name = f"{arn[6]}-result" + from function import queue + queue_client = queue.queue(queue_name, account_id, region) + queue_client.send_message(stats) diff --git a/benchmarks/wrappers/aws/python/queue.py b/benchmarks/wrappers/aws/python/queue.py new file mode 100644 index 00000000..95cde8a7 --- /dev/null +++ b/benchmarks/wrappers/aws/python/queue.py @@ -0,0 +1,14 @@ +import boto3 + +class queue: + client = None + + def __init__(self, queue_name: str, account_id: str, region: str): + self.client = boto3.client('sqs', region_name=region) + self.queue_url = f"https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}" + + def send_message(self, message: str): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=message, + ) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 6375de39..e64b17c1 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -2,16 +2,69 @@ import base64 import datetime, io, json, logging, os, uuid +from azure.identity import ManagedIdentityCredential +from azure.storage.queue import QueueClient + import azure.functions as func def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() + req_json = req.get_json() if 'connection_string' in req_json: os.environ['STORAGE_CONNECTION_STRING'] = req_json['connection_string'] + req_json['request-id'] = context.invocation_id req_json['income-timestamp'] = income_timestamp + + return func.HttpResponse(measure(req_json), mimetype="application/json") + +def handler_queue(msg: func.QueueMessage, context: func.Context): + income_timestamp = datetime.datetime.now().timestamp() + + logging.info('Python queue trigger function processed a queue item.') + payload = msg.get_json() + + payload['request-id'] = context.invocation_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + queue_name = f"{os.getenv('WEBSITE_SITE_NAME')}-result" + storage_account = os.getenv('STORAGE_ACCOUNT') + logging.info(queue_name) + logging.info(storage_account) + + from . import queue + queue_client = queue.queue(queue_name, storage_account) + queue_client.send_message(stats) + +def handler_storage(blob: func.InputStream, context: func.Context): + income_timestamp = datetime.datetime.now().timestamp() + + logging.info('Python Blob trigger function processed %s', blob.name) + payload = json.loads(blob.readline().decode('utf-8')) + + payload['request-id'] = context.invocation_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + queue_name = f"{os.getenv('WEBSITE_SITE_NAME')}-result" + storage_account = os.getenv('STORAGE_ACCOUNT') + logging.info(queue_name) + logging.info(storage_account) + + from . import queue + queue_client = queue.queue(queue_name, storage_account) + queue_client.send_message(stats) + +def measure(req_json) -> str: + # logging.info("TIPU") TODO(oana) remove + # logging.info(type(req_json)) + req_id = req_json['request-id'] + begin = datetime.datetime.now() # We are deployed in the same directory from . import function @@ -29,7 +82,6 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo from . import storage storage_inst = storage.storage.get_instance() b = req_json.get('logs').get('bucket') - req_id = context.invocation_id storage_inst.upload_stream(b, '{}.json'.format(req_id), io.BytesIO(json.dumps(log_data).encode('utf-8'))) results_end = datetime.datetime.now() @@ -57,8 +109,7 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo cold_marker = True is_cold_worker = True - return func.HttpResponse( - json.dumps({ + return json.dumps({ 'begin': begin.strftime('%s.%f'), 'end': end.strftime('%s.%f'), 'results_time': results_time, @@ -67,25 +118,5 @@ def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpRespo 'is_cold_worker': is_cold_worker, 'container_id': container_id, 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id - }), - mimetype="application/json" - ) - -def handler_queue(msg: func.QueueMessage): - logging.info('Python queue trigger function processed a queue item.') - payload = msg.get_body().decode('utf-8') - - from . import function - ret = function.handler(payload) - - # TODO(oana) - -def handler_storage(blob: func.InputStream): - logging.info('Python Blob trigger function processed %s', blob.name) - payload = blob.readline().decode('utf-8') # TODO(oana) - - from . import function - ret = function.handler(payload) - - # TODO(oana) + 'request_id': req_id + }) \ No newline at end of file diff --git a/benchmarks/wrappers/azure/python/queue.py b/benchmarks/wrappers/azure/python/queue.py new file mode 100644 index 00000000..93824181 --- /dev/null +++ b/benchmarks/wrappers/azure/python/queue.py @@ -0,0 +1,15 @@ +from azure.identity import ManagedIdentityCredential +from azure.storage.queue import QueueClient + +class queue: + client = None + + def __init__(self, queue_name: str, storage_account: str): + account_url = f"https://{storage_account}.queue.core.windows.net" + managed_credential = ManagedIdentityCredential() + self.client = QueueClient(account_url, + queue_name=queue_name, + credential=managed_credential) + + def send_message(self, message: str): + self.client.send_message(message) diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index 6a1284e5..c547c52c 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -8,10 +8,67 @@ def handler_http(req): income_timestamp = datetime.datetime.now().timestamp() req_id = req.headers.get('Function-Execution-Id') - req_json = req.get_json() req_json['request-id'] = req_id req_json['income-timestamp'] = income_timestamp + + return measure(req_json), 200, {'ContentType': 'application/json'} + +def handler_queue(data, context): + income_timestamp = datetime.datetime.now().timestamp() + + serialized_payload = data.get('data') + payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) + + payload['request-id'] = context.event_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Retrieve the project id and construct the result queue name. + project_id = context.resource.split("/")[1] + topic_name = f"{context.resource.split('/')[3]}-result" + + from function import queue + queue_client = queue.queue(topic_name, project_id) + queue_client.send_message(stats) + +def handler_storage(data, context): + income_timestamp = datetime.datetime.now().timestamp() + + bucket_name = data.get('bucket') + name = data.get('name') + filepath = '/tmp/bucket_contents' + + from function import storage + storage_inst = storage.storage.get_instance() + storage_inst.download(bucket_name, name, filepath) + + payload = {} + + with open(filepath, 'r') as fp: + payload = json.load(fp) + + payload['request-id'] = context.event_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Retrieve the project id and construct the result queue name. + from google.auth import default + # Used to be an env var, now we need an additional request to the metadata + # server to retrieve it. + _, project_id = default() + topic_name = f"{context.resource['name'].split('/')[3]}-result" + + from function import queue + queue_client = queue.queue(topic_name, project_id) + queue_client.send_message(stats) + +# TODO(oana) comment +def measure(req_json) -> str: + req_id = req_json['request-id'] + begin = datetime.datetime.now() # We are deployed in the same directorygit status from function import function @@ -62,32 +119,4 @@ def handler_http(req): 'request_id': req_id, 'cold_start_var': cold_start_var, 'container_id': container_id, - }), 200, {'ContentType': 'application/json'} - -def handler_queue(data, context): - serialized_payload = data.get('data') - payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) - - from function import function - ret = function.handler(payload) - - # TODO(oana) - -def handler_storage(data, context): - bucket_name = data.get('bucket') - name = data.get('name') - filepath = '/tmp/bucket_contents' - - from function import storage - storage_inst = storage.storage.get_instance() - storage_inst.download(bucket_name, name, filepath) - - payload = {} - - with open(filepath, 'r') as fp: - payload = json.load(fp) - - from function import function - ret = function.handler(payload) - - # TODO(oana) + }) diff --git a/benchmarks/wrappers/gcp/python/queue.py b/benchmarks/wrappers/gcp/python/queue.py new file mode 100644 index 00000000..b6e009e7 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/queue.py @@ -0,0 +1,14 @@ +from google.cloud import pubsub_v1 + +class queue: + client = None + + def __init__(self, topic_name: str, project_id: str): + self.client = pubsub_v1.PublisherClient() + self.topic_name = 'projects/{project_id}/topics/{topic}'.format( + project_id=project_id, + topic=topic_name, + ) + + def send_message(self, message: str): + self.client.publish(self.topic_name, message.encode("utf-8")) diff --git a/config/systems.json b/config/systems.json index 6e9ddd0c..88358b60 100644 --- a/config/systems.json +++ b/config/systems.json @@ -71,7 +71,8 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [] } @@ -114,10 +115,13 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [ - "azure-storage-blob" + "azure-storage-blob", + "\nazure-storage-queue", + "\nazure-identity" ] } }, @@ -162,7 +166,8 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [ "google-cloud-storage" diff --git a/docs/modularity.md b/docs/modularity.md index f2614655..f6015b8e 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -267,7 +267,8 @@ Check other platforms to see how configuration is defined, for example, for AWS: "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py" ], "packages": [] } diff --git a/requirements.gcp.txt b/requirements.gcp.txt index 9cb90916..3d1aea35 100644 --- a/requirements.gcp.txt +++ b/requirements.gcp.txt @@ -4,3 +4,4 @@ google-api-python-client==1.12.5 google-cloud-monitoring==2.0.0 google-api-python-client-stubs google-cloud-logging==2.0.0 +google-cloud-pubsub=2.23.0 \ No newline at end of file diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index c18b96c0..c9167553 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -445,6 +445,7 @@ def __init__(self, cache_client, config, docker_client, language): function - function.py - storage.py + - queue.py - resources handler.py diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 92c65dcc..9bcb52e6 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -119,6 +119,7 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: function - function.py - storage.py + - queue.py - resources handler.py diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 39f022ee..5c296c90 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -8,7 +8,9 @@ import boto3 from sebs.aws.aws import AWS +from sebs.aws.queue import SQS from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class LibraryTrigger(Trigger): @@ -133,65 +135,59 @@ def __init__( self, fname: str, deployment_client: Optional[AWS] = None, - queue_arn: Optional[str] = None, - queue_url: Optional[str] = None, + queue: Optional[SQS] = None, + result_queue: Optional[SQS] = None ): super().__init__() self.name = fname + self._queue = queue + self._result_queue = result_queue + self._deployment_client = deployment_client - self._deployment_client = None - self._queue_arn = None - self._queue_url = None - - if deployment_client: - self._deployment_client = deployment_client - if queue_arn: - self._queue_arn = queue_arn - if queue_url: - self._queue_url = queue_url - - # When creating the trigger for the first time, also create and store - # queue information. - if not self.queue_arn and not self.queue_url: - # Init clients - lambda_client = self.deployment_client.get_lambda_client() - sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) - - # Create queue - self.logging.debug(f"Creating queue {self.name}") - - self._queue_url = sqs_client.create_queue(QueueName=self.name)["QueueUrl"] - self._queue_arn = sqs_client.get_queue_attributes( - QueueUrl=self.queue_url, AttributeNames=["QueueArn"] - )["Attributes"]["QueueArn"] - - self.logging.debug("Created queue") + if (not self._queue): + self._queue = SQS( + self.name, + QueueType.TRIGGER, + self.deployment_client.config.region + ) + self.queue.create_queue() # Add queue trigger + lambda_client = self.deployment_client.get_lambda_client() if not len( lambda_client.list_event_source_mappings( - EventSourceArn=self.queue_arn, FunctionName=self.name + EventSourceArn=self.queue.queue_arn, FunctionName=self.name )["EventSourceMappings"] ): lambda_client.create_event_source_mapping( - EventSourceArn=self.queue_arn, + EventSourceArn=self.queue.queue_arn, FunctionName=self.name, MaximumBatchingWindowInSeconds=1, ) + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = SQS( + fname, + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() + @staticmethod def typename() -> str: return "AWS.QueueTrigger" @property - def queue_arn(self) -> str: - assert self._queue_arn - return self._queue_arn + def queue(self) -> SQS: + assert self._queue + return self._queue @property - def queue_url(self) -> str: - assert self._queue_url - return self._queue_url + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue @property def deployment_client(self) -> AWS: @@ -210,14 +206,21 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - sqs_client = boto3.client("sqs", region_name=self.deployment_client.config.region) - # Publish payload to queue serialized_payload = json.dumps(payload) - sqs_client.send_message(QueueUrl=self.queue_url, MessageBody=serialized_payload) - self.logging.info(f"Sent message to queue {self.name}") + begin = datetime.datetime.now() + self.queue.send_message(serialized_payload) - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -229,33 +232,45 @@ def serialize(self) -> dict: return { "type": "Queue", "name": self.name, - "queue_arn": self.queue_arn, - "queue_url": self.queue_url, + "queue": self.queue.serialize(), + "result_queue": self.result_queue.serialize() } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], None, obj["queue_arn"], obj["queue_url"]) + return QueueTrigger( + obj["name"], + None, + SQS.deserialize(obj["queue"]), + SQS.deserialize(obj["result_queue"]) + ) class StorageTrigger(Trigger): def __init__( - self, fname: str, deployment_client: Optional[AWS] = None, bucket_name: Optional[str] = None + self, + fname: str, + deployment_client: Optional[AWS] = None, + bucket_name: Optional[str] = None, + result_queue: Optional[SQS] = None ): super().__init__() self.name = fname self._deployment_client = None self._bucket_name = None + self._result_queue = None if deployment_client: self._deployment_client = deployment_client if bucket_name: self._bucket_name = bucket_name + if result_queue: + self._result_queue = result_queue # When creating the trigger for the first time, also create and store # storage bucket information. - if not self.bucket_name: + if not self._bucket_name: # Init clients s3 = boto3.resource("s3") lambda_client = self.deployment_client.get_lambda_client() @@ -301,6 +316,16 @@ def __init__( } ) + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = SQS( + fname, + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() + @staticmethod def typename() -> str: return "AWS.StorageTrigger" @@ -315,6 +340,11 @@ def deployment_client(self) -> AWS: assert self._deployment_client return self._deployment_client + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + @deployment_client.setter def deployment_client(self, deployment_client: AWS): self._deployment_client = deployment_client @@ -331,10 +361,20 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Put object s3 = boto3.resource("s3") + begin = datetime.datetime.now() s3.Object(self.bucket_name, "payload.json").put(Body=serialized_payload) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -343,8 +383,18 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} + return { + "type": "Storage", + "name": self.name, + "bucket_name": self.bucket_name, + "result_queue": self.result_queue.serialize() + } @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], None, obj["bucket_name"]) + return StorageTrigger( + obj["name"], + None, + obj["bucket_name"], + SQS.deserialize(obj["result_queue"]) + ) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index bae91f38..638b3b25 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -573,8 +573,8 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) time.sleep(20) """ - The only implemented trigger at the moment is HTTPTrigger. - It is automatically created for each function. + Supports HTTP, queue and storage triggers, as specified by + the user when SeBS is run. """ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: @@ -605,12 +605,36 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) ) trigger: Trigger - if trigger_type == Trigger.TriggerType.QUEUE: - trigger = QueueTrigger(function.name, storage_account) - self.logging.info(f"Created Queue trigger for {function.name} function") - elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name, storage_account) - self.logging.info(f"Created Storage trigger for {function.name} function") + if trigger_type == Trigger.TriggerType.QUEUE or trigger_type == Trigger.TriggerType.STORAGE: + resource_group = self.config.resources.resource_group(self.cli_instance) + + # Set the storage account as an env var on the function. + ret = self.cli_instance.execute( + f"az functionapp config appsettings set --name {function.name} " + f" --resource-group {resource_group} " + f" --settings STORAGE_ACCOUNT={storage_account}" + ) + print(ret.decode()) + + # Connect the function app to the result queue via Service + # Connector. + ret = self.cli_instance.execute( + f"az webapp connection create storage-queue " + f" --resource-group {resource_group} " + f" --target-resource-group {resource_group} " + f" --account {storage_account} " + f" --name {function.name} " + f" --client-type python " # TODO(oana) does this work for nodejs + f" --system-identity " + ) + print(ret.decode()) + + if trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger(function.name, storage_account, self.config.region) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger(function.name, storage_account, self.config.region) + self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index fc5c1e36..96edff9a 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,6 +1,8 @@ import base64 import concurrent.futures +import datetime import json +import time from typing import Any, Dict, Optional # noqa from azure.core.exceptions import ResourceExistsError @@ -9,7 +11,9 @@ from azure.storage.queue import QueueClient from sebs.azure.config import AzureResources +from sebs.azure.queue import AzureQueue from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class AzureTrigger(Trigger): @@ -55,35 +59,38 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, storage_account: str, queue_name: Optional[str] = None): + def __init__( + self, + fname: str, + storage_account: str, + region: str, + queue: Optional[AzureQueue] = None, + result_queue: Optional[AzureQueue] = None + ): super().__init__() self.name = fname self._storage_account = storage_account - self._queue_name = None - - if queue_name: - self._queue_name = queue_name - else: - # Having a queue name field is currently a bit contrived - it is mostly a - # device to indicate that a trigger resource exists and is cached. In the - # future, we may adopt a different convention for naming trigger resources, - # at which point this will become truly useful. - self._queue_name = self.name - - # Init client - default_credential = DefaultAzureCredential() - queue_client = QueueClient( - self.account_url, queue_name=self.queue_name, credential=default_credential + self._region = region + self._queue = queue + self._result_queue = result_queue + + if (not self._queue): + self._queue = AzureQueue( + self.name, + QueueType.TRIGGER, + self.storage_account, + self.region ) - - # Create queue - self.logging.info(f"Creating queue {self.queue_name}") - - try: - queue_client.create_queue() - self.logging.info("Created queue") - except ResourceExistsError: - self.logging.info("Queue already exists, reusing...") + self.queue.create_queue() + + if (not self._result_queue): + self._result_queue = AzureQueue( + fname, + QueueType.RESULT, + storage_account, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -98,6 +105,21 @@ def storage_account(self) -> str: assert self._storage_account return self._storage_account + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def queue(self) -> AzureQueue: + assert self._queue + return self._queue + + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + @property def account_url(self) -> str: return f"https://{self.storage_account}.queue.core.windows.net" @@ -111,18 +133,23 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") - # Prepare queue client - default_credential = DefaultAzureCredential() - queue_client = QueueClient( - self.account_url, queue_name=self.queue_name, credential=default_credential - ) - # Publish payload to queue serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8") - queue_client.send_message(serialized_payload) - self.logging.info(f"Sent message to queue {self.queue_name}") + begin = datetime.datetime.now() + self.queue.send_message(serialized_payload) + + response = "" + while (response == ""): + response = self.result_queue.receive_message() + if (response == ""): + time.sleep(5) - # TODO(oana): gather metrics + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -135,19 +162,37 @@ def serialize(self) -> dict: "type": "Queue", "name": self.name, "storage_account": self.storage_account, - "queue_name": self.queue_name, + "region": self.region, + "queue": self.queue.serialize(), + "result_queue": self.result_queue.serialize() } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], obj["storage_account"], obj["queue_name"]) + return QueueTrigger( + obj["name"], + obj["storage_account"], + obj["region"], + AzureQueue.deserialize(obj["queue"]), + AzureQueue.deserialize(obj["result_queue"]) + ) class StorageTrigger(Trigger): - def __init__(self, fname: str, storage_account: str, container_name: Optional[str] = None): + def __init__( + self, + fname: str, + storage_account: str, + region: str, + result_queue: Optional[AzureQueue] = None, + container_name: Optional[str] = None + ): super().__init__() self.name = fname self._storage_account = storage_account + self._region = region + self._result_queue = result_queue + self._container_name = None if container_name: self._container_name = container_name @@ -170,6 +215,15 @@ def __init__(self, fname: str, storage_account: str, container_name: Optional[st except ResourceExistsError: self.logging.info("Container already exists, reusing...") + if (not self._result_queue): + self._result_queue = AzureQueue( + fname, + QueueType.RESULT, + storage_account, + self.region + ) + self._result_queue.create_queue() + @staticmethod def typename() -> str: return "Azure.StorageTrigger" @@ -183,6 +237,16 @@ def storage_account(self) -> str: assert self._storage_account return self._storage_account + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + @property def account_url(self) -> str: return f"https://{self.storage_account}.blob.core.windows.net" @@ -209,11 +273,26 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client = blob_service_client.get_blob_client( container=self.container_name, blob=file_name ) + begin = datetime.datetime.now() with open(file=file_name, mode="rb") as payload_data: blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") - # TODO(oana): gather metrics + response = "" # TODO(oana) cleanup + # while (response == ""): + # response = self.result_queue.receive_message() + # if (response == ""): + # time.sleep(5) + while (response == ""): + time.sleep(5) + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -226,9 +305,17 @@ def serialize(self) -> dict: "type": "Storage", "name": self.name, "storage_account": self.storage_account, + "region": self.region, + "result_queue": self.result_queue.serialize(), "container_name": self.container_name, } @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], obj["storage_account"], obj["container_name"]) + return StorageTrigger( + obj["name"], + obj["storage_account"], + obj["region"], + AzureQueue.deserialize(obj["result_queue"]), + obj["container_name"] + ) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 6691f1b5..e8614cdc 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -248,7 +248,8 @@ def package_code( shutil.move(file, function_dir) requirements = open(os.path.join(directory, "requirements.txt"), "w") - requirements.write("google-cloud-storage") + requirements.write("google-cloud-storage\n") + requirements.write("google-cloud-pubsub") requirements.close() # rename handler function.py since in gcp it has to be caled main.py @@ -401,11 +402,17 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.logging.info(f"Created HTTP trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.QUEUE: trigger = QueueTrigger( - function.name, self.get_trigger_resource_name(function.name), self + function.name, + self.get_trigger_resource_name(function.name), + self.config.region ) self.logging.info(f"Created Queue trigger for {function.name} function") elif trigger_type == Trigger.TriggerType.STORAGE: - trigger = StorageTrigger(function.name, self.get_trigger_resource_name(function.name)) + trigger = StorageTrigger( + function.name, + self.get_trigger_resource_name(function.name), + self.config.region + ) self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 556b46a9..072b9ba9 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -11,7 +11,9 @@ from google.cloud import storage as gcp_storage from sebs.gcp.gcp import GCP +from sebs.gcp.queue import GCPQueue from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class LibraryTrigger(Trigger): @@ -120,11 +122,28 @@ def deserialize(obj: dict) -> Trigger: class QueueTrigger(Trigger): - def __init__(self, fname: str, queue_name: str, deployment_client: Optional[GCP] = None): + def __init__( + self, + fname: str, + queue_name: str, + region: str, + result_queue: Optional[GCPQueue] = None + ): super().__init__() self.name = fname - self._deployment_client = deployment_client self._queue_name = queue_name + self._region = region + self._result_queue = result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = GCPQueue( + fname, + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -136,13 +155,14 @@ def queue_name(self) -> str: return self._queue_name @property - def deployment_client(self) -> GCP: - assert self._deployment_client - return self._deployment_client + def region(self) -> str: + assert self._region + return self._region - @deployment_client.setter - def deployment_client(self, deployment_client: GCP): - self._deployment_client = deployment_client + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue @staticmethod def trigger_type() -> Trigger.TriggerType: @@ -160,6 +180,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) # Publish payload to queue + begin = datetime.datetime.now() pub_sub.projects().topics().publish( topic=self.queue_name, body={ @@ -167,7 +188,16 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: }, ).execute() - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -176,18 +206,47 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Queue", "name": self.name, "queue_name": self.queue_name} + return { + "type": "Queue", + "name": self.name, + "queue_name": self.queue_name, + "region": self.region, + "result_queue": self.result_queue.serialize() + } @staticmethod def deserialize(obj: dict) -> Trigger: - return QueueTrigger(obj["name"], obj["queue_name"]) + return QueueTrigger( + obj["name"], + obj["queue_name"], + obj["region"], + GCPQueue.deserialize(obj["result_queue"]) + ) class StorageTrigger(Trigger): - def __init__(self, fname: str, bucket_name: str): + def __init__( + self, + fname: str, + bucket_name: str, + region: str, + result_queue: Optional[GCPQueue] = None + ): super().__init__() self.name = fname self._bucket_name = bucket_name + self._region = region + self._result_queue = result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (not self._result_queue): + self._result_queue = GCPQueue( + fname, + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -202,13 +261,23 @@ def bucket_name(self) -> str: assert self._bucket_name return self._bucket_name + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") # Init clients client = gcp_storage.Client() - bucket_instance = client.bucket(self.bucket_name) + bucket_instance = client.bucket(self.name) # Prepare payload file_name = "payload.json" @@ -218,11 +287,21 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Upload object gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) + begin = datetime.datetime.now() blob.upload_from_filename(file_name) self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") - # TODO(oana): gather metrics + response = "" + while (response == ""): + response = self.result_queue.receive_message() + + end = datetime.datetime.now() + + # TODO(oana) error handling + result = ExecutionResult.from_times(begin, end) + result.parse_benchmark_output(json.loads(response)) + return result def async_invoke(self, payload: dict) -> concurrent.futures.Future: @@ -231,8 +310,19 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "Storage", "name": self.name, "bucket_name": self.bucket_name} + return { + "type": "Storage", + "name": self.name, + "bucket_name": self.bucket_name, + "region": self.region, + "result_queue": self.result_queue.serialize() + } @staticmethod def deserialize(obj: dict) -> Trigger: - return StorageTrigger(obj["name"], obj["bucket_name"]) + return StorageTrigger( + obj["name"], + obj["bucket_name"], + obj["region"], + GCPQueue.deserialize(obj["result_queue"]) + ) diff --git a/tests/aws/create_function.py b/tests/aws/create_function.py index e672cc89..bb22cfb0 100644 --- a/tests/aws/create_function.py +++ b/tests/aws/create_function.py @@ -35,8 +35,8 @@ class AWSCreateFunction(unittest.TestCase): } } package_files = { - "python": ["handler.py", "function/storage.py", "requirements.txt", '.python_packages/'], - "nodejs": ["handler.js", "function/storage.js", "package.json", "node_modules/"] + "python": ["handler.py", "function/storage.py", "function/queue.py", "requirements.txt", '.python_packages/'], + "nodejs": ["handler.js", "function/storage.js", "function/queue.js", "package.json", "node_modules/"] } benchmark = "110.dynamic-html" function_name_suffixes = [] From f8f316210b9c8d67369471cdf452731d119d8d1f Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 9 Sep 2024 00:27:56 +0200 Subject: [PATCH 18/19] Fix --- sebs/benchmark.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 8e2a5a86..f0911708 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -13,7 +13,7 @@ from sebs.cache import Cache from sebs.faas.config import Resources from sebs.utils import find_benchmark, project_absolute_path, LoggingBase -from sebs.faas.function import Trigger +# from sebs.faas.function import Trigger from sebs.faas.storage import PersistentStorage from typing import TYPE_CHECKING @@ -471,10 +471,12 @@ def recalculate_code_size(self): return self._code_size def build( - self, - deployment_build_step: Callable[ - [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] - ], + self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] + # TODO(oana) fix? + # self, + # deployment_build_step: Callable[ + # [str, str, str, str, bool, Optional[Trigger.TriggerType]], Tuple[str, int] + # ], ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild From dac2840e0d85d894e9122ac33630847c4a1731c1 Mon Sep 17 00:00:00 2001 From: orosca Date: Mon, 9 Sep 2024 10:20:47 +0200 Subject: [PATCH 19/19] Wrap up and clean up --- benchmarks/wrappers/azure/python/handler.py | 4 ++-- benchmarks/wrappers/gcp/python/handler.py | 3 ++- sebs/aws/triggers.py | 2 -- sebs/azure/azure.py | 2 -- sebs/azure/triggers.py | 8 +------- sebs/gcp/triggers.py | 2 -- 6 files changed, 5 insertions(+), 16 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index e64b17c1..70843b6e 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -60,9 +60,9 @@ def handler_storage(blob: func.InputStream, context: func.Context): queue_client = queue.queue(queue_name, storage_account) queue_client.send_message(stats) +# Contains generic logic for gathering measurements for the function at hand, +# given a request JSON. Used by all handlers, regardless of the trigger. def measure(req_json) -> str: - # logging.info("TIPU") TODO(oana) remove - # logging.info(type(req_json)) req_id = req_json['request-id'] begin = datetime.datetime.now() diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index c547c52c..51a9d604 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -65,7 +65,8 @@ def handler_storage(data, context): queue_client = queue.queue(topic_name, project_id) queue_client.send_message(stats) -# TODO(oana) comment +# Contains generic logic for gathering measurements for the function at hand, +# given a request JSON. Used by all handlers, regardless of the trigger. def measure(req_json) -> str: req_id = req_json['request-id'] diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 5c296c90..96b9bc20 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -217,7 +217,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result @@ -371,7 +370,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 638b3b25..f3257d3a 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -351,7 +351,6 @@ def update_function(self, function: Function, code_package: Benchmark): self._mount_function_code(code_package) url = self.publish_function(function, code_package, True) - # TODO(oana): this might need refactoring if function.name.endswith("http"): trigger = HTTPTrigger( url, self.config.resources.data_storage_account(self.cli_instance) @@ -624,7 +623,6 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) f" --target-resource-group {resource_group} " f" --account {storage_account} " f" --name {function.name} " - f" --client-type python " # TODO(oana) does this work for nodejs f" --system-identity " ) print(ret.decode()) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 96edff9a..2a2e96bc 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -146,7 +146,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result @@ -278,18 +277,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: blob_client.upload_blob(payload_data, overwrite=True) self.logging.info(f"Uploaded payload to container {self.container_name}") - response = "" # TODO(oana) cleanup - # while (response == ""): - # response = self.result_queue.receive_message() - # if (response == ""): - # time.sleep(5) + response = "" while (response == ""): time.sleep(5) response = self.result_queue.receive_message() end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 072b9ba9..41fbe18c 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -194,7 +194,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result @@ -298,7 +297,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() - # TODO(oana) error handling result = ExecutionResult.from_times(begin, end) result.parse_benchmark_output(json.loads(response)) return result