diff --git a/benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json b/benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/cancel_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py new file mode 100644 index 00000000..c5824d07 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/function.py @@ -0,0 +1,93 @@ +import datetime, json, os + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'booking_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('booking_table') + + +def handler(event): + """AWS Lambda Function entrypoint to cancel booking + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + chargeId: string + pre-authorization charge ID + + context: object, required + Lambda Context runtime methods and attributes + Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html + + Returns + ------- + boolean + + Raises + ------ + BookingCancellationException + Booking Cancellation Exception including error message upon failure + """ + if ('booking_id' not in event): + raise ValueError('Invalid booking ID') + + booking_id = event['booking_id'] + + print(f'Cancelling booking - {booking_id}') + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # ret = table.update_item( + # Key={'id': booking_id}, + # ConditionExpression='id = :idVal', + # UpdateExpression='SET #STATUS = :cancelled', + # ExpressionAttributeNames={'#STATUS': 'status'}, + # ExpressionAttributeValues={':idVal': booking_id, ':cancelled': 'CANCELLED'}, + # ) + update_end = datetime.datetime.now() + + release_flight_input = { + 'outbound_flight_id': event['outbound_flight_id'], + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='release_flight', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(release_flight_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" diff --git a/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/cancel_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/collect_payment/config.json b/benchmarks/100.webapps/130.airline-booking/collect_payment/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/collect_payment/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py b/benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py new file mode 100644 index 00000000..4596f605 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/collect_payment/python/function.py @@ -0,0 +1,104 @@ +import datetime, json, os + +payment_endpoint = 'dummy' + +from . import misc +from . import queue + + +def handler(event): + """AWS Lambda Function entrypoint to collect payment + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + chargeId: string + pre-authorization charge ID + + Returns + ------- + dict + receiptUrl: string + receipt URL of charge collected + + price: int + amount collected + """ + if ('charge_id' not in event): + raise ValueError('Invalid Charge ID') + + pre_authorization_token = event['charge_id'] + customer_id = event['customer_id'] + + print(f'Collecting payment from customer {customer_id} using {pre_authorization_token} token') + if (not payment_endpoint): + raise ValueError('Payment API URL is invalid -- Consider reviewing PAYMENT_API_URL env') + + # This used to be an external API call: + # + # payment_payload = {'charge_id': charge_id} + # ret = requests.post(payment_endpoint, json=payment_payload) + # ret.raise_for_status() + # payment_response = ret.json() + + if (payment_successful()): + confirm_booking_input = { + 'customer_id': event['customer_id'], + 'booking_id': event['booking_id'], + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='confirm_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(confirm_booking_input)) + queue_end = datetime.datetime.now() + else: + cancel_booking_input = { + 'outbound_flight_id': event['outbound_flight_id'], + 'booking_id': event['booking_id'], + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='cancel_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(cancel_booking_input)) + queue_end = datetime.datetime.now() + + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'queue_time': queue_time + } + } + +def payment_successful(): + return True # False + + +""" +Sample input: +{ + "charge_id": "ch_1EeqlbF4aIiftV70qXHQewmn", + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/collect_payment/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/collect_payment/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/config.json b/benchmarks/100.webapps/130.airline-booking/config.json new file mode 100644 index 00000000..173009a5 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/config.json @@ -0,0 +1,4 @@ +{ + "type": "app", + "resources": [] +} \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json b/benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json new file mode 100644 index 00000000..ea0abe89 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/confirm_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python", "nodejs"], + "trigger": "queue" +} \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py new file mode 100755 index 00000000..99b057b1 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/function.py @@ -0,0 +1,92 @@ +import datetime, json, os, secrets + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'booking_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('booking_table') + + +def handler(event): + """AWS Lambda Function entrypoint to confirm booking + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + bookingId: string + Unique Booking ID of an unconfirmed booking + + Returns + ------- + string + bookingReference generated + """ + if ('booking_id' not in event): + raise ValueError('Invalid booking ID') + + booking_id = event['booking_id'] + + print(f'Confirming booking - {booking_id}') + reference = secrets.token_urlsafe(4) + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # ret = table.update_item( + # Key={'id': booking_id}, + # ConditionExpression='id = :idVal', + # UpdateExpression='SET bookingReference = :br, #STATUS = :confirmed', + # ExpressionAttributeNames={'#STATUS': 'status'}, + # ExpressionAttributeValues={ + # ':br': reference, + # ':idVal': booking_id, + # ':confirmed': 'CONFIRMED', + # }, + # ReturnValues='UPDATED_NEW', + # ) + update_end = datetime.datetime.now() + + notify_booking_input = { + 'customer_id': event['customer_id'], + 'reference': reference, + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='notify_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(notify_booking_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/confirm_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/input.py b/benchmarks/100.webapps/130.airline-booking/input.py new file mode 100644 index 00000000..2f13bca6 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/input.py @@ -0,0 +1,19 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def upload_files(data_root, data_dir, upload_func): + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + '/' + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + input_config = {} + input_config['charge_id'] = 'ch_1EeqlbF4aIiftV70qXHQewmn' + input_config['customer_id'] = 'd749f277-0950-4ad6-ab04-98988721e475' + input_config['outbound_flight_id'] = 'fae7c68d-2683-4968-87a2-dfe2a090c2d1' + return input_config diff --git a/benchmarks/100.webapps/130.airline-booking/notify_booking/config.json b/benchmarks/100.webapps/130.airline-booking/notify_booking/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/notify_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py new file mode 100644 index 00000000..2ab8d4bd --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/notify_booking/python/function.py @@ -0,0 +1,63 @@ +import datetime, json, os + + +def handler(event): + """AWS Lambda Function entrypoint to notify booking + + Parameters + ---------- + event: dict, required + Step Functions State Machine event + + customer_id: string + Unique Customer ID + + price: string + Flight price + + bookingReference: string + Confirmed booking reference + + context: object, required + Lambda Context runtime methods and attributes + Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html + + Returns + ------- + string + notificationId + Unique ID confirming notification delivery + + Raises + ------ + BookingNotificationException + Booking Notification Exception including error message upon failure + """ + if ('customer_id' not in event): + raise ValueError('Invalid customer ID') + + customer_id = event['customer_id'] + booking_reference = event['reference'] + + successful_subject = f'Booking confirmation for {booking_reference}' + unsuccessful_subject = f'Unable to process booking' + + subject = successful_subject if booking_reference else unsuccessful_subject + booking_status = 'confirmed' if booking_reference else 'cancelled' + + # Should we plan to support SNS-like cloud components in SeBS: + # + # payload = {'customerId': customer_id} + # ret = sns.publish( + # TopicArn=booking_sns_topic, + # Message=json.dumps(payload), + # Subject=subject, + # MessageAttributes={ + # 'Booking.Status': {'DataType': 'String', 'StringValue': booking_status} + # }, + # ) + + return { + 'result': 0, + 'measurement': {} + } diff --git a/benchmarks/100.webapps/130.airline-booking/notify_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/notify_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/release_flight/config.json b/benchmarks/100.webapps/130.airline-booking/release_flight/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/release_flight/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py b/benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py new file mode 100644 index 00000000..412665f1 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/release_flight/python/function.py @@ -0,0 +1,48 @@ +import datetime, json, os + +from . import misc + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'flight_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('flight_table') + + +def handler(event): + if ('outbound_flight_id' not in event): + raise ValueError('Invalid arguments') + + outbound_flight_id = event['outbound_flight_id'] + + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # table.update_item( + # Key={'id': outbound_flight_id}, + # ConditionExpression='id = :idVal',# AND seatCapacity < maximumSeating', + # UpdateExpression='SET seatCapacity = seatCapacity + :dec', + # ExpressionAttributeValues={ + # ':idVal': outbound_flight_id, + # ':dec': 1 + # }, + # ) + update_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'measurement': { + 'update_time': update_time + } + } + +""" +Sample input: +{ + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" diff --git a/benchmarks/100.webapps/130.airline-booking/release_flight/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/release_flight/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json b/benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json new file mode 100644 index 00000000..9214cfb8 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_booking/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py b/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py new file mode 100755 index 00000000..cc6e7ce7 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/function.py @@ -0,0 +1,106 @@ +import datetime, json, os, uuid + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'booking_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('booking_table') + + +def is_booking_request_valid(booking): + return all(x in booking for x in ['outbound_flight_id', 'customer_id', 'charge_id']) + +def handler(event): + """AWS Lambda Function entrypoint to reserve a booking + + Parameters + ---------- + event: + chargeId: string + Pre-authorization payment token + + customerId: string + Customer unique identifier + + bookingOutboundFlightId: string + Outbound flight unique identifier + + Returns + ------- + bookingId: string + booking ID generated + """ + if (not is_booking_request_valid(event)): + raise ValueError('Invalid booking request') + + print(f"Reserving booking for customer {event['customer_id']}") + booking_id = str(uuid.uuid4()) + outbound_flight_id = event['outbound_flight_id'] + customer_id = event['customer_id'] + payment_token = event['charge_id'] + + booking_item = { + 'id': booking_id, + 'bookingOutboundFlightId': outbound_flight_id, + 'checkedIn': False, + 'customer': customer_id, + 'paymentToken': payment_token, + 'status': 'UNCONFIRMED', + 'createdAt': str(datetime.datetime.now()), + } + update_begin = datetime.datetime.now() + # table.put_item(Item=booking_item) + nosql_client.insert( + table_name=nosql_table_name, + data=booking_item, + ) + update_end = datetime.datetime.now() + + collect_payment_input = { + 'booking_id': booking_id, + 'customer_id': customer_id, + 'charge_id': payment_token, + 'outbound_flight_id': outbound_flight_id, + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='collect_payment', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(collect_payment_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurements': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "charge_id": "ch_1EeqlbF4aIiftV70qXHQewmn", + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "booking_id": "5347fc8e-46f2-434d-9d09-fa4d31f7f266", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/reserve_booking/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json b/benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json new file mode 100644 index 00000000..5cfe2171 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_flight/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "queue", + "entrypoint": true +} diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py b/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py new file mode 100644 index 00000000..ff303a08 --- /dev/null +++ b/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/function.py @@ -0,0 +1,75 @@ +import datetime, json, os + +from . import misc +from . import queue + +from . import nosql +nosql_client = nosql.nosql.get_instance() + +nosql_table_name = 'flight_table' + +# import boto3 +# session = boto3.Session() +# dynamodb = session.resource('dynamodb') +# table = dynamodb.Table('flight_table') + + +def handler(event): + if ('outbound_flight_id' not in event): + raise ValueError('Invalid arguments') + + outbound_flight_id = event['outbound_flight_id'] + + update_begin = datetime.datetime.now() + # TODO: rewrite with generic nosql wrapper once it is merged + # table.update_item( + # Key={"id": outbound_flight_id}, + # ConditionExpression="id = :idVal AND seatCapacity > :zero", + # UpdateExpression="SET seatCapacity = seatCapacity - :dec", + # ExpressionAttributeValues={ + # ":idVal": outbound_flight_id, + # ":dec": 1, + # ":zero": 0 + # }, + # ) + update_end = datetime.datetime.now() + + reserve_booking_input = { + 'charge_id': event['charge_id'], + 'customer_id': event['customer_id'], + 'outbound_flight_id': outbound_flight_id, + 'parent_execution_id': event['request-id'] + } + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='reserve_booking', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(reserve_booking_input)) + queue_end = datetime.datetime.now() + + update_time = (update_end - update_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': 1, + 'measurement': { + 'update_time': update_time, + 'queue_time': queue_time + } + } + + +""" +Sample input: +{ + "charge_id": "ch_1EeqlbF4aIiftV70qXHQewmn", + "customer_id": "d749f277-0950-4ad6-ab04-98988721e475", + "outbound_flight_id": "fae7c68d-2683-4968-87a2-dfe2a090c2d1" +} +""" \ No newline at end of file diff --git a/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/requirements.txt b/benchmarks/100.webapps/130.airline-booking/reserve_flight/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/400.inference/420.prediction-reviews/config.json b/benchmarks/400.inference/420.prediction-reviews/config.json new file mode 100644 index 00000000..97c03cd8 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/config.json @@ -0,0 +1,4 @@ +{ + "type": "app", + "resources": [] +} diff --git a/benchmarks/400.inference/420.prediction-reviews/input.py b/benchmarks/400.inference/420.prediction-reviews/input.py new file mode 100644 index 00000000..38f6106a --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/input.py @@ -0,0 +1,27 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def upload_files(data_root, data_dir, upload_func): + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + '/' + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + dataset_name = 'reviews50mb.csv' + upload_func(0, dataset_name, os.path.join(data_dir, 'dataset', dataset_name)) + + model_name = 'lr_model.pk' + # upload_func(0, model_name, os.path.join(data_dir, 'model', model_name)) + + input_config = {'dataset': {}, 'model': {}, 'bucket': {}} + input_config['dataset']['key'] = dataset_name + input_config['model']['key'] = model_name + input_config['bucket']['name'] = benchmarks_bucket + input_config['bucket']['path'] = input_paths[0] + input_config['input'] = 'The ambiance is magical. The food and service was nice! The lobster and cheese was to die for and our steaks were cooked perfectly.' + return input_config diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/config.json b/benchmarks/400.inference/420.prediction-reviews/prediction/config.json new file mode 100644 index 00000000..5131c929 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 1024, + "languages": ["python"], + "trigger": "queue" +} \ No newline at end of file diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py b/benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py new file mode 100644 index 00000000..6ccd8cb3 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/python/function.py @@ -0,0 +1,78 @@ +import datetime, io, joblib, os, re, sys, zipfile + +from time import time + +from . import queue +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd + +from importlib.metadata import version + +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression + +cleanup_re = re.compile('[^a-z]+') +def cleanup(sentence): + sentence = sentence.lower() + sentence = cleanup_re.sub(' ', sentence).strip() + return sentence + +def handler(event): + x = event['input'] + bucket = event['bucket']['name'] + bucket_path = event['bucket']['path'] + dataset_key = event['dataset']['key'] + model_key = event['model']['key'] + + dataset_path = f'{bucket_path}/{dataset_key}' + model_path = f'{bucket_path}/{model_key}' + + dataset_local_path = '/tmp/' + dataset_key + model_local_path = '/tmp/' + model_key + + download_dataset_begin = datetime.datetime.now() + client.download(bucket, dataset_path, dataset_local_path) + download_dataset_end = datetime.datetime.now() + + download_model_begin = datetime.datetime.now() + client.download(bucket, model_path, model_local_path) + download_model_end = datetime.datetime.now() + + df = pd.read_csv(dataset_local_path) + + process_begin = datetime.datetime.now() + df_input = pd.DataFrame() + df_input['x'] = [x] + df_input['x'] = df_input['x'].apply(cleanup) + + df['train'] = df['Text'].apply(cleanup) + tfidf_vect = TfidfVectorizer(min_df=100).fit(df['train']) + X = tfidf_vect.transform(df_input['x']) + + model = joblib.load(model_local_path) + y = model.predict(X) + process_end = datetime.datetime.now() + + download_dataset_time = (download_dataset_end - download_dataset_begin) / datetime.timedelta(microseconds=1) + download_model_time = (download_model_end - download_model_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': 0, + 'measurement': { + 'download_dataset_time': download_dataset_time, + 'download_model_time': download_model_time, + 'process_time': process_time + } + } diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh b/benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt b/benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/prediction/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/420.prediction-reviews/training/config.json b/benchmarks/400.inference/420.prediction-reviews/training/config.json new file mode 100644 index 00000000..2bef2ce3 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 540, + "memory": 1024, + "languages": ["python"], + "trigger": "queue", + "entrypoint": true +} diff --git a/benchmarks/400.inference/420.prediction-reviews/training/python/function.py b/benchmarks/400.inference/420.prediction-reviews/training/python/function.py new file mode 100644 index 00000000..ad562e78 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/python/function.py @@ -0,0 +1,99 @@ +import datetime, io, joblib, json, os, re, sys, zipfile + +from time import time + +from . import misc +from . import queue +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd + +from importlib.metadata import version + +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression + +cleanup_re = re.compile('[^a-z]+') +def cleanup(sentence): + sentence = sentence.lower() + sentence = cleanup_re.sub(' ', sentence).strip() + return sentence + +def handler(event): + bucket = event['bucket']['name'] + bucket_path = event['bucket']['path'] + dataset_key = event['dataset']['key'] + model_key = event['model']['key'] + + dataset_path = f'{bucket_path}/{dataset_key}' + model_path = f'{bucket_path}/{model_key}' + + model_local_path = '/tmp/' + model_key + + download_begin = datetime.datetime.now() + dataset = client.get_object(bucket, dataset_path) + download_end = datetime.datetime.now() + + df = pd.read_csv(io.BytesIO(dataset)) + + process_begin = datetime.datetime.now() + df['train'] = df['Text'].apply(cleanup) + + tfidf_vector = TfidfVectorizer(min_df=100).fit(df['train']) + + train = tfidf_vector.transform(df['train']) + + model = LogisticRegression() + model.fit(train, df['Score']) + process_end = datetime.datetime.now() + + joblib.dump(model, model_local_path) + + upload_begin = datetime.datetime.now() + client.upload(bucket, model_path, model_local_path, True) + upload_end = datetime.datetime.now() + + prediction_input = {'dataset': {}, 'model': {}, 'bucket': {}} + prediction_input['input'] = event['input'] + prediction_input['bucket']['name'] = bucket + prediction_input['bucket']['path'] = bucket_path + prediction_input['dataset']['key'] = dataset_key + prediction_input['model']['key'] = model_key + prediction_input['parent_execution_id'] = event['request-id'] + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='prediction', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(prediction_input)) + queue_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': prediction_input, + 'fns_triggered': 1, + 'measurement': { + 'download_time': download_time, + 'process_time': process_time, + 'upload_time': upload_time, + 'queue_time': queue_time + } + } diff --git a/benchmarks/400.inference/420.prediction-reviews/training/python/package.sh b/benchmarks/400.inference/420.prediction-reviews/training/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt b/benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/420.prediction-reviews/training/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/430.feature-gen/config.json b/benchmarks/400.inference/430.feature-gen/config.json new file mode 100644 index 00000000..9f6b2d1e --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/config.json @@ -0,0 +1,5 @@ +{ + "type": "app", + "resources": [] + } + \ No newline at end of file diff --git a/benchmarks/400.inference/430.feature-gen/extractor/config.json b/benchmarks/400.inference/430.feature-gen/extractor/config.json new file mode 100644 index 00000000..70fe5fe1 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 300, + "memory": 512, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/400.inference/430.feature-gen/extractor/python/function.py b/benchmarks/400.inference/430.feature-gen/extractor/python/function.py new file mode 100644 index 00000000..6fde1bee --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/python/function.py @@ -0,0 +1,75 @@ +import datetime, io, json, os, re, sys, uuid, zipfile + +from . import misc +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd + + +cleanup_re = re.compile('[^a-z]+') + +def cleanup(sentence): + sentence = sentence.lower() + sentence = cleanup_re.sub(' ', sentence).strip() + return sentence + +def handler(event): + output_bucket = event['output_bucket']['name'] + dataset_key = event['object']['key'] + + # Cleanup the bucket between function iterations. + input_bucket = misc.function_name( + fname='extractor', + language='python', + version='3.9', + trigger='storage' + ) + delete_begin = datetime.datetime.now() + client.delete_object(input_bucket, dataset_key) + delete_end = datetime.datetime.now() + + # Do the work. + process_begin = datetime.datetime.now() + df = pd.read_json(event['input']) + + df['Text'] = df['Text'].apply(cleanup) + text = df['Text'].tolist() + result = set() + for item in text: + result.update(item.split()) + + feature = str(list(result)) + feature = feature.lstrip('[').rstrip(']').replace(' ', '') + process_end = datetime.datetime.now() + + key = misc.object_path('extractors_output', dataset_key.split('.')[0] + '.txt') + upload_start = datetime.datetime.now() + client.upload_stream( + output_bucket, + key, + io.BytesIO(feature.encode('utf-8')), + True + ) + upload_end = datetime.datetime.now() + + delete_time = (delete_end - delete_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_start) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'measurement': { + 'delete_time': delete_time, + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/400.inference/430.feature-gen/extractor/python/package.sh b/benchmarks/400.inference/430.feature-gen/extractor/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/extractor/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/430.feature-gen/input.py b/benchmarks/400.inference/430.feature-gen/input.py new file mode 100644 index 00000000..eef61e31 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/input.py @@ -0,0 +1,23 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def upload_files(data_root, data_dir, upload_func): + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + '/' + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + dataset_name = 'reviews10mb.csv' + upload_func(0, dataset_name, os.path.join(data_dir, 'dataset', dataset_name)) + + input_config = {'object': {}, 'bucket': {}} + input_config['object']['key'] = dataset_name + input_config['bucket']['name'] = benchmarks_bucket + input_config['bucket']['path'] = input_paths[0] + input_config['extractors'] = 5 + return input_config diff --git a/benchmarks/400.inference/430.feature-gen/job_status/config.json b/benchmarks/400.inference/430.feature-gen/job_status/config.json new file mode 100644 index 00000000..178b0bf7 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/job_status/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 128, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/400.inference/430.feature-gen/job_status/python/function.py b/benchmarks/400.inference/430.feature-gen/job_status/python/function.py new file mode 100644 index 00000000..5f2676da --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/job_status/python/function.py @@ -0,0 +1,47 @@ +import datetime, json, time + +from . import misc +from . import queue +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + bucket = event['output_bucket']['name'] + file_count = int(event['file_count']) + + wait_begin = datetime.datetime.now() + while (True): + objs = client.list_objects(bucket, misc.object_path('extractors_output', '')) + + if (file_count == len(objs)): + wait_end = datetime.datetime.now() + orchestrator_input = {'bucket': {}} + orchestrator_input['bucket']['name'] = bucket + orchestrator_input['start_reducer'] = True + orchestrator_input['parent_execution_id'] = event['request-id'] + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='orchestrator', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(orchestrator_input)) + queue_end = datetime.datetime.now() + + wait_time = (wait_end - wait_begin) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': orchestrator_input, + 'fns_triggered': 1, + 'measurement': { + 'wait_time': wait_time, + 'queue_time': queue_time + } + } + else: + time.sleep(10) diff --git a/benchmarks/400.inference/430.feature-gen/job_status/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/job_status/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/config.json b/benchmarks/400.inference/430.feature-gen/orchestrator/config.json new file mode 100644 index 00000000..670c7d7f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 300, + "memory": 1024, + "languages": ["python"], + "trigger": "queue", + "entrypoint": true +} diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py b/benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py new file mode 100644 index 00000000..1521edb8 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/python/function.py @@ -0,0 +1,120 @@ +import datetime, io, json, os, sys, uuid, zipfile + +from . import misc +from . import queue +from . import storage +client = storage.storage.get_instance() + +# Extract zipped pandas - which is otherwise too large for AWS/GCP. +if os.path.exists('function/pandas.zip'): + zipfile.ZipFile('function/pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +if os.path.exists('./pandas.zip'): + zipfile.ZipFile('./pandas.zip').extractall('/tmp/') + sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages/')) + +import pandas as pd +import numpy as np + + +def handler(event): + bucket = event['bucket']['name'] + + if ('start_reducer' in event): + reducer_input = {'bucket': {}} + reducer_input['bucket']['name'] = bucket + reducer_input['parent_execution_id'] = event['request-id'] + + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='reducer', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(reducer_input)) + queue_end = datetime.datetime.now() + + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': reducer_input, + 'fns_triggered': 1, + 'measurement': { + 'queue_time': queue_time + } + } + + bucket_path = event['bucket']['path'] + dataset_key = event['object']['key'] + extractors = int(event['extractors']) + + dataset_path = f'{bucket_path}/{dataset_key}' + dataset_local_path = '/tmp/' + dataset_key + + download_start = datetime.datetime.now() + client.download(bucket, dataset_path, dataset_local_path) + download_end = datetime.datetime.now() + + process_start = datetime.datetime.now() + df = pd.read_csv(dataset_local_path) + shards = np.array_split(df, extractors) + process_end = datetime.datetime.now() + + # Prepare and send the output. Trigger 'extractors' and 'job_status'. + extractor_bucket = misc.function_name( + fname='extractor', + language='python', + version='3.9', + trigger='storage' + ) + + upload_start = datetime.datetime.now() + for shard in shards: + key = f'shard-{uuid.uuid4()}' + + extractor_input = {'object': {}, 'output_bucket': {}} + extractor_input['object']['key'] = key + extractor_input['output_bucket']['name'] = bucket + extractor_input['input'] = shard.to_json() + extractor_input['parent_execution_id'] = event['request-id'] + client.upload_stream( + extractor_bucket, + key, + io.BytesIO(json.dumps(extractor_input).encode('utf-8')), + True + ) + upload_end = datetime.datetime.now() + + job_status_input = {'output_bucket': {}} + job_status_input['output_bucket']['name'] = bucket + job_status_input['file_count'] = extractors + job_status_input['parent_execution_id'] = event['request-id'] + queue_begin = datetime.datetime.now() + queue_client = queue.queue( + misc.function_name( + fname='job_status', + language='python', + version='3.9', + trigger='queue' + ) + ) + queue_client.send_message(json.dumps(job_status_input)) + queue_end = datetime.datetime.now() + + download_time = (download_end - download_start) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_start) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_start) / datetime.timedelta(microseconds=1) + queue_time = (queue_end - queue_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': extractors + 1, + 'measurement': { + 'download_time': download_time, + 'process_time': process_time, + 'upload_time': upload_time, + 'queue_time': queue_time + } + } diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh b/benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/orchestrator/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/400.inference/430.feature-gen/reducer/config.json b/benchmarks/400.inference/430.feature-gen/reducer/config.json new file mode 100644 index 00000000..b4d7b2b6 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 540, + "memory": 1024, + "languages": ["python"], + "trigger": "queue" +} diff --git a/benchmarks/400.inference/430.feature-gen/reducer/python/function.py b/benchmarks/400.inference/430.feature-gen/reducer/python/function.py new file mode 100644 index 00000000..cc7062a5 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/python/function.py @@ -0,0 +1,60 @@ +import datetime, io +from sklearn.feature_extraction.text import TfidfVectorizer + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + bucket = event['bucket']['name'] + + list_begin = datetime.datetime.now() + objs = client.list_objects(bucket, misc.object_path('extractors_output', '')) + list_end = datetime.datetime.now() + + result = [] + preprocess_begin = datetime.datetime.now() + for obj in objs: + body = str(client.get_object(bucket, obj)) + + word = body.replace("'", '').split(',') + result.extend(word) + preprocess_end = datetime.datetime.now() + + # Cleanup the bucket between function iterations. + delete_begin = datetime.datetime.now() + for obj in objs: + client.delete_object(bucket, obj) + delete_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + tfidf_vect = TfidfVectorizer().fit(result) + feature = str(tfidf_vect.get_feature_names_out()) + feature = feature.lstrip('[').rstrip(']').replace(' ' , '') + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + client.upload_stream( + bucket, + misc.object_path('reducer_output', 'feature'), + io.BytesIO(feature.encode('utf-8')), + True + ) + upload_end = datetime.datetime.now() + + list_time = (list_end - list_begin) / datetime.timedelta(microseconds=1) + preprocess_time = (preprocess_end - preprocess_begin) / datetime.timedelta(microseconds=1) + delete_time = (delete_end - delete_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'measurement': { + 'list_time': list_time, + 'preprocess_time': preprocess_time, + 'delete_time': delete_time, + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/400.inference/430.feature-gen/reducer/python/package.sh b/benchmarks/400.inference/430.feature-gen/reducer/python/package.sh new file mode 100644 index 00000000..772ec17f --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/pandas/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr pandas.zip $1/pandas + rm -rf $1/pandas + echo "Pandas-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt b/benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt new file mode 100644 index 00000000..0bfc02c9 --- /dev/null +++ b/benchmarks/400.inference/430.feature-gen/reducer/python/requirements.txt @@ -0,0 +1,4 @@ +numpy<2 +pandas +scikit-learn +joblib diff --git a/benchmarks/500.scientific/505.map-reduce/config.json b/benchmarks/500.scientific/505.map-reduce/config.json new file mode 100644 index 00000000..fd954c87 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/config.json @@ -0,0 +1,5 @@ +{ + "type": "app", + "resources": [] +} + \ No newline at end of file diff --git a/benchmarks/500.scientific/505.map-reduce/input.py b/benchmarks/500.scientific/505.map-reduce/input.py new file mode 100644 index 00000000..13a5b03d --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/input.py @@ -0,0 +1,14 @@ +import glob, os + +def buckets_count(): + return (1, 0) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + # Consider using a larger text file as input: + # input_text_file = '' + # upload_func(0, input_text_file, os.path.join(data_dir, 'input_text', input_text_file)) + + input_config = {} + input_config['mappers'] = 2 + input_config['text'] = 'the quick brown fox jumps jumps. over the lazy lazy lazy dog dog' + return input_config diff --git a/benchmarks/500.scientific/505.map-reduce/mapper/config.json b/benchmarks/500.scientific/505.map-reduce/mapper/config.json new file mode 100644 index 00000000..993a4481 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/mapper/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/500.scientific/505.map-reduce/mapper/python/function.py b/benchmarks/500.scientific/505.map-reduce/mapper/python/function.py new file mode 100644 index 00000000..43a9f850 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/mapper/python/function.py @@ -0,0 +1,55 @@ +import datetime, io, json, os, uuid + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + text = event['text'] + + # split by space + process_begin = datetime.datetime.now() + words = text.split(' ') + + # count for every word + counts = {} + for word in words: + if word not in counts: + counts[word] = 1 + else: + counts[word] += 1 + counts = dict(sorted(counts.items())) + process_end = datetime.datetime.now() + + sorter_input = { + 'counts': counts, + 'mappers': event['mappers'], + 'parent_execution_id': event['request-id'] + } + + file_name = f'payload{str(uuid.uuid4())}.json' + file_path = f'/tmp/{file_name}' + with open(file_path, 'w') as f: + f.write(json.dumps(sorter_input)) + + bucket = misc.function_name( + fname='sorter', + language='python', + version='3.9', + trigger='storage' + ) + upload_begin = datetime.datetime.now() + client.upload(bucket, file_name, file_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': counts, + 'fns_triggered': 1, + 'measurement': { + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/500.scientific/505.map-reduce/mapper/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/mapper/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/500.scientific/505.map-reduce/reducer/config.json b/benchmarks/500.scientific/505.map-reduce/reducer/config.json new file mode 100644 index 00000000..993a4481 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/reducer/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/500.scientific/505.map-reduce/reducer/python/function.py b/benchmarks/500.scientific/505.map-reduce/reducer/python/function.py new file mode 100644 index 00000000..8867fcdc --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/reducer/python/function.py @@ -0,0 +1,30 @@ +import datetime, io, json, os, uuid + +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + text = event['input'] + + count = 0 + word_for_this_reducer = '' + + process_begin = datetime.datetime.now() + words = text.split('\n')[:-1] + for word in words: + splits = word.split(',') + word_for_this_reducer = splits[0] + count += int(splits[1]) + process_end = datetime.datetime.now() + + process_time = (process_end - process_start) / datetime.timedelta(microseconds=1) + return { + 'result': { # Could also be written to S3 + word_for_this_reducer: count + }, + 'measurement': { + 'process_time': process_time + }, + 'fns_triggered': 0 + } diff --git a/benchmarks/500.scientific/505.map-reduce/reducer/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/reducer/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/500.scientific/505.map-reduce/sorter/config.json b/benchmarks/500.scientific/505.map-reduce/sorter/config.json new file mode 100644 index 00000000..57fb5b4a --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/sorter/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 256, + "languages": ["python"], + "trigger": "storage" +} diff --git a/benchmarks/500.scientific/505.map-reduce/sorter/python/function.py b/benchmarks/500.scientific/505.map-reduce/sorter/python/function.py new file mode 100644 index 00000000..5163a0a6 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/sorter/python/function.py @@ -0,0 +1,101 @@ +import datetime, io, json, os, uuid + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + mappers = int(event['mappers']) + + # check that all files from the mappers are ready + list_begin = datetime.datetime.now() + objs = client.list_objects( + misc.function_name( + fname='sorter', + language='python', + version='3.9', + trigger='storage' + ) + ) + list_end = datetime.datetime.now() + list_time = (list_end - list_begin) / datetime.timedelta(microseconds=1) + + if (len(objs) != mappers): + return { + 'result': 0, + 'measurement': { + 'list_time': list_time + } + } + + # download everything and stick it together: ['bear,1', 'pear,3', 'pear,4'] + process_begin = datetime.datetime.now() + word_list = [] + for obj in objs: + words = client.get_object(fn_name, obj) + words = json.loads(words) + + for k, v in words['counts'].items(): + word_list.append('{},{}'.format(k, str(v))) + + # sort + word_list.sort() + + # everything which is the same goes into one file, e.g. all pears + current = [word_list[0]] + groups = [] + for i in range(0, len(word_list) - 1): + if word_list[i].split(',')[0] == word_list[i + 1].split(',')[0]: + current.append(word_list[i + 1]) + else: + groups.append(current) + current = [word_list[i + 1]] + if (len(current)): + groups.append(current) + + # flatten groups + new_group = [] + for group in groups: + flattened = '' + for word in group: + flattened += word + '\n' + new_group.append(flattened) + groups = new_group + process_end = datetime.datetime.now() + + # publish to bucket + upload_begin = datetime.datetime.now() + fns_triggered = len(groups) + for group in groups: + word = group.split(',')[0] + + reducer_input = { + 'input': group, + 'parent_execution_id': event['request-id'] + } + + local_path = f'/tmp/{word}' + with open(local_path, 'w') as f: + f.write(json.dumps(reducer_input)) + + fn_name = misc.function_name( + fname='reducer', + language='python', + version='3.9', + trigger='storage' + ) + client.upload(fn_name, word, local_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': 0, + 'fns_triggered': fns_triggered, + 'measurement': { + 'list_time': list_time, + 'process_time': process_time, + 'upload_time': upload_time + } + } diff --git a/benchmarks/500.scientific/505.map-reduce/sorter/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/sorter/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/500.scientific/505.map-reduce/splitter/config.json b/benchmarks/500.scientific/505.map-reduce/splitter/config.json new file mode 100644 index 00000000..40218357 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/splitter/config.json @@ -0,0 +1,7 @@ +{ + "timeout": 30, + "memory": 128, + "languages": ["python"], + "trigger": "storage", + "entrypoint": true +} diff --git a/benchmarks/500.scientific/505.map-reduce/splitter/python/function.py b/benchmarks/500.scientific/505.map-reduce/splitter/python/function.py new file mode 100644 index 00000000..3bc0b0f7 --- /dev/null +++ b/benchmarks/500.scientific/505.map-reduce/splitter/python/function.py @@ -0,0 +1,44 @@ +import datetime, io, json, os + +from . import misc +from . import storage +client = storage.storage.get_instance() + + +def handler(event): + mappers = int(event['mappers']) + text = event['text'] + + # split by . + sentences = text.split('.') + + # obtain length of list + chunk_size = len(sentences) // mappers + + # split the list according to how many mappers are declared + local_path = '/tmp/payload.json' + for i in range(mappers): + begin_range = i * chunk_size + end_range = min((i + 1) * chunk_size, len(sentences)) + mapper_input = { + 'text': ' '.join(sentences[begin_range : end_range]), + 'mappers': mappers, + 'parent_execution_id': event['request-id'] + } + with open(local_path, 'w') as f: + f.write(json.dumps(mapper_input)) + + # storage trigger code: for each mapper, upload to bucket + bucket = misc.function_name( + fname='mapper', + language='python', + version='3.9', + trigger='storage' + ) + client.upload(bucket, f'payload{i}.json', local_path, True) + + return { + 'result': 0, + 'fns_triggered': mappers, + 'measurement': {} + } diff --git a/benchmarks/500.scientific/505.map-reduce/splitter/python/requirements.txt b/benchmarks/500.scientific/505.map-reduce/splitter/python/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 907b2c61..c228c4da 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,18 +1,42 @@ - import datetime, io, json, os, sys, uuid # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) -# TODO: usual trigger -# implement support for S3 and others def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() + populate_env_vars(context) + + # Flag to indicate whether the measurements should be returned as an HTTP + # response or via a result queue. + return_http = True + + # Queue trigger + if ("Records" in event and event["Records"][0]["eventSource"] == 'aws:sqs'): + event = json.loads(event["Records"][0]["body"]) + + return_http = False + + # Storage trigger + if ("Records" in event and "s3" in event["Records"][0]): + bucket_name = event["Records"][0]["s3"]["bucket"]["name"] + file_name = event["Records"][0]["s3"]["object"]["key"] + + from function import storage + storage_inst = storage.storage.get_instance() + + obj = storage_inst.get_object(bucket_name, file_name) + event = json.loads(obj) + + return_http = False + # HTTP trigger with API Gateaway if 'body' in event: event = json.loads(event['body']) + + # Run function and measure. req_id = context.aws_request_id event['request-id'] = req_id event['income-timestamp'] = income_timestamp @@ -24,6 +48,10 @@ def handler(event, context): log_data = { 'output': ret['result'] } + if 'fns_triggered' in ret and ret['fns_triggered'] > 0: + log_data['fns_triggered'] = ret['fns_triggered'] + if 'parent_execution_id' in event: + log_data['parent_execution_id'] = event['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in event: @@ -55,17 +83,34 @@ def handler(event, context): if "cold_start" in os.environ: cold_start_var = os.environ["cold_start"] - return { - 'statusCode': 200, - 'body': json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': context.aws_request_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }) - } + stats = json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': context.aws_request_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }) + + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + + if (return_http or result_queue is None): + # HTTP / library trigger, standalone function: return an HTTP response. + return { + 'statusCode': 200, + 'body': stats + } + else: + # Queue trigger, storage trigger, or application: write to a queue. + from function import queue + queue_client = queue.queue(result_queue) + queue_client.send_message(stats) + +def populate_env_vars(context): + arn = context.invoked_function_arn.split(":") + os.environ['REGION'] = arn[3] + os.environ['ACCOUNT_ID'] = arn[4] diff --git a/benchmarks/wrappers/aws/python/misc.py b/benchmarks/wrappers/aws/python/misc.py new file mode 100644 index 00000000..92f0d565 --- /dev/null +++ b/benchmarks/wrappers/aws/python/misc.py @@ -0,0 +1,23 @@ +import os + +def function_name( + fname: str, + language: str, + version: str, + trigger: str +): + app_name = os.getenv('APP_NAME') + full_name = f'{app_name}_{fname}_{language}_{version}' + full_name = full_name.replace('.', '_') + + if (trigger == 'storage'): + full_name = full_name.replace('_', '-') + + return full_name + +def object_path(path: str, key: str): + app_name = os.getenv('APP_NAME') + path = f'{app_name}-{path}/{key}' + path = path.replace('_', '-') + + return path diff --git a/benchmarks/wrappers/aws/python/queue.py b/benchmarks/wrappers/aws/python/queue.py new file mode 100644 index 00000000..ac13f3c0 --- /dev/null +++ b/benchmarks/wrappers/aws/python/queue.py @@ -0,0 +1,17 @@ +import boto3, os + +class queue: + client = None + + def __init__(self, queue_name: str): + account_id = os.getenv('ACCOUNT_ID') + region = os.getenv('REGION') + + self.client = boto3.client('sqs', region_name=region) + self.queue_url = f"https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}" + + def send_message(self, message: str): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=message, + ) diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 4be0025e..111dd8b9 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -21,8 +21,10 @@ def unique_name(name): random=str(uuid.uuid4()).split('-')[0] ) - def upload(self, bucket, file, filepath): + def upload(self, bucket, file, filepath, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file self.client.upload_file(filepath, bucket, key_name) return key_name @@ -37,8 +39,10 @@ def download_directory(self, bucket, prefix, path): os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) - def upload_stream(self, bucket, file, data): + def upload_stream(self, bucket, file, data, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file self.client.upload_fileobj(data, bucket, key_name) return key_name @@ -46,8 +50,26 @@ def download_stream(self, bucket, file): data = io.BytesIO() self.client.download_fileobj(bucket, file, data) return data.getbuffer() - + + def get_object(self, bucket, file): + obj = self.client.get_object(Bucket=bucket, Key=file) + return obj['Body'].read() + def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance + + def list_objects(self, bucket, prefix=None): + if (not prefix): + prefix = '' + res = self.client.list_objects(Bucket=bucket, Prefix=prefix) + + objs = [] + for obj in res['Contents']: + objs.append(obj['Key']) + + return objs + + def delete_object(self, bucket, key): + self.client.delete_object(Bucket=bucket, Key=key) diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 5f7f14f2..9e025969 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -1,18 +1,76 @@ -import datetime, io, json, os, uuid +import base64 +import datetime, io, json, logging, os, uuid + +from azure.identity import ManagedIdentityCredential +from azure.storage.queue import QueueClient import azure.functions as func -# TODO: usual trigger -# implement support for blob and others -def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: +def handler_http(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() + req_json = req.get_json() + if 'connection_string' in req_json: os.environ['STORAGE_CONNECTION_STRING'] = req_json['connection_string'] + req_json['request-id'] = context.invocation_id req_json['income-timestamp'] = income_timestamp + + return func.HttpResponse(measure(req_json), mimetype="application/json") + +def handler_queue(msg: func.QueueMessage, context: func.Context): + income_timestamp = datetime.datetime.now().timestamp() + + populate_env_vars() + + payload = msg.get_json() + + payload['request-id'] = context.invocation_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + storage_account = os.getenv('DATA_STORAGE_ACCOUNT') + + if (result_queue and storage_account): + + from . import queue + queue_client = queue.queue(result_queue) + queue_client.send_message(stats) + +def handler_storage(blob: func.InputStream, context: func.Context): + income_timestamp = datetime.datetime.now().timestamp() + + populate_env_vars() + + payload = json.loads(blob.readline().decode('utf-8')) + logging.info(payload) + + payload['request-id'] = context.invocation_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + storage_account = os.getenv('DATA_STORAGE_ACCOUNT') + + if (result_queue and storage_account): + + from . import queue + queue_client = queue.queue(result_queue) + queue_client.send_message(stats) + +# Contains generic logic for gathering measurements for the function at hand, +# given a request JSON. Used by all handlers, regardless of the trigger. +def measure(req_json) -> str: + req_id = req_json['request-id'] + begin = datetime.datetime.now() # We are deployed in the same directory from . import function @@ -22,6 +80,10 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: log_data = { 'output': ret['result'] } + if 'fns_triggered' in ret and ret['fns_triggered'] > 0: + log_data['fns_triggered'] = ret['fns_triggered'] + if 'parent_execution_id' in req_json: + log_data['parent_execution_id'] = req_json['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in req_json: @@ -30,7 +92,6 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: from . import storage storage_inst = storage.storage.get_instance() b = req_json.get('logs').get('bucket') - req_id = context.invocation_id storage_inst.upload_stream(b, '{}.json'.format(req_id), io.BytesIO(json.dumps(log_data).encode('utf-8'))) results_end = datetime.datetime.now() @@ -58,8 +119,7 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: cold_marker = True is_cold_worker = True - return func.HttpResponse( - json.dumps({ + return json.dumps({ 'begin': begin.strftime('%s.%f'), 'end': end.strftime('%s.%f'), 'results_time': results_time, @@ -68,8 +128,8 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: 'is_cold_worker': is_cold_worker, 'container_id': container_id, 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id - }), - mimetype="application/json" - ) + 'request_id': req_id + }) +def populate_env_vars(): + os.environ['ACCOUNT_ID'] = os.getenv('DATA_STORAGE_ACCOUNT') diff --git a/benchmarks/wrappers/azure/python/misc.py b/benchmarks/wrappers/azure/python/misc.py new file mode 100644 index 00000000..714470e0 --- /dev/null +++ b/benchmarks/wrappers/azure/python/misc.py @@ -0,0 +1,26 @@ +import os + +def function_name( + fname: str, + language: str, + version: str, + trigger: str +): + app_name = os.getenv('APP_NAME') + app_name = app_name[:app_name.rfind('-')] + + storage_account = os.getenv('ACCOUNT_ID') + storage_account = storage_account[7:] + + full_name = f"{app_name}-{fname}-{language}-{version}-{storage_account}-{trigger}" + full_name = full_name.replace(".", "-") + full_name = full_name.replace("_", "-") + + return full_name + +def object_path(path: str, key: str): + app_name = os.getenv('APP_NAME') + path = f"{app_name}-{path}/{key}" + path = path.replace("_", "-") + + return path diff --git a/benchmarks/wrappers/azure/python/queue.py b/benchmarks/wrappers/azure/python/queue.py new file mode 100644 index 00000000..465ea057 --- /dev/null +++ b/benchmarks/wrappers/azure/python/queue.py @@ -0,0 +1,20 @@ +import os + +from azure.identity import ManagedIdentityCredential +from azure.storage.queue import QueueClient, BinaryBase64DecodePolicy, BinaryBase64EncodePolicy + +class queue: + client = None + + def __init__(self, queue_name: str): + storage_account = os.getenv('ACCOUNT_ID') + account_url = f"https://{storage_account}.queue.core.windows.net" + managed_credential = ManagedIdentityCredential() + self.client = QueueClient(account_url, + queue_name=queue_name, + credential=managed_credential, + message_encode_policy=BinaryBase64EncodePolicy(), + message_decode_policy=BinaryBase64DecodePolicy()) + + def send_message(self, message: str): + self.client.send_message(message.encode('utf-8')) diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 74c08307..4257c48a 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -22,9 +22,9 @@ def unique_name(name): random=str(uuid.uuid4()).split('-')[0] ) - def upload(self, container, file, filepath): + def upload(self, container, file, filepath, overwrite=False): with open(filepath, 'rb') as data: - return self.upload_stream(container, file, data) + return self.upload_stream(container, file, data, overwrite) def download(self, container, file, filepath): with open(filepath, 'wb') as download_file: @@ -39,13 +39,15 @@ def download_directory(self, container, prefix, path): os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(container, file_name, os.path.join(path, file_name)) - def upload_stream(self, container, file, data): + def upload_stream(self, container, file, data, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file client = self.client.get_blob_client( container=container, blob=key_name ) - client.upload_blob(data) + client.upload_blob(data, overwrite=overwrite) return key_name def download_stream(self, container, file): @@ -56,3 +58,23 @@ def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance + + def get_object(self, container, key): + blob_client = self.client.get_blob_client(container=container, blob=key) + downloader = blob_client.download_blob() + return downloader.readall() + + def list_objects(self, container, prefix=None): + client = self.client.get_container_client(container=container) + + # Azure returns an iterator. Turn it into a list. + objs = [] + res = client.list_blob_names(name_starts_with=prefix) + for obj in res: + objs.append(obj) + + return objs + + def delete_object(self, bucket, key): + blob_client = self.client.get_blob_client(container=bucket, blob=key) + blob_client.delete_blob(delete_snapshots="include") diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index b9017b52..0e1cbf03 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -1,16 +1,77 @@ -import datetime, io, json, os, uuid, sys +import base64, datetime, io, json, os, uuid, sys -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +from google.auth import default +from google.cloud import storage as gcp_storage +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) -def handler(req): +def handler_http(req): income_timestamp = datetime.datetime.now().timestamp() req_id = req.headers.get('Function-Execution-Id') - req_json = req.get_json() req_json['request-id'] = req_id req_json['income-timestamp'] = income_timestamp + + return measure(req_json), 200, {'ContentType': 'application/json'} + +def handler_queue(data, context): + income_timestamp = datetime.datetime.now().timestamp() + + populate_env_vars() + + serialized_payload = data.get('data') + payload = json.loads(base64.b64decode(serialized_payload).decode("utf-8")) + + payload['request-id'] = context.event_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + + if (result_queue): + from function import queue + queue_client = queue.queue(result_queue) + queue_client.send_message(stats) + +def handler_storage(data, context): + income_timestamp = datetime.datetime.now().timestamp() + + populate_env_vars() + + bucket_name = data.get('bucket') + name = data.get('name') + filepath = '/tmp/bucket_contents' + + from function import storage + storage_inst = storage.storage.get_instance() + storage_inst.download(bucket_name, name, filepath) + + payload = {} + + with open(filepath, 'r') as fp: + payload = json.load(fp) + + payload['request-id'] = context.event_id + payload['income-timestamp'] = income_timestamp + + stats = measure(payload) + + # Send the results onwards. + result_queue = os.getenv('RESULT_QUEUE') + + if (result_queue): + from function import queue + queue_client = queue.queue(result_queue) + queue_client.send_message(stats) + +# Contains generic logic for gathering measurements for the function at hand, +# given a request JSON. Used by all handlers, regardless of the trigger. +def measure(req_json) -> str: + req_id = req_json['request-id'] + begin = datetime.datetime.now() # We are deployed in the same directorygit status from function import function @@ -21,6 +82,10 @@ def handler(req): log_data = { 'output': ret['result'] } + if 'fns_triggered' in ret and ret['fns_triggered'] > 0: + log_data['fns_triggered'] = ret['fns_triggered'] + if 'parent_execution_id' in req_json: + log_data['parent_execution_id'] = req_json['parent_execution_id'] if 'measurement' in ret: log_data['measurement'] = ret['measurement'] if 'logs' in req_json: @@ -61,4 +126,8 @@ def handler(req): 'request_id': req_id, 'cold_start_var': cold_start_var, 'container_id': container_id, - }), 200, {'ContentType': 'application/json'} + }) + +def populate_env_vars(): + _, project_id = default() + os.environ['ACCOUNT_ID'] = project_id diff --git a/benchmarks/wrappers/gcp/python/misc.py b/benchmarks/wrappers/gcp/python/misc.py new file mode 100644 index 00000000..dab78bf5 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/misc.py @@ -0,0 +1,20 @@ +import os + +def function_name( + fname: str, + language: str, + version: str, + trigger: str +): + app_name = os.getenv('APP_NAME') + full_name = f"{app_name}_{fname}_{language}_{version}-{trigger}" + full_name = full_name.replace(".", "_") + + return full_name + +def object_path(path: str, key: str): + app_name = os.getenv('APP_NAME') + path = f"{app_name}-{path}/{key}" + path = path.replace("_", "-") + + return path diff --git a/benchmarks/wrappers/gcp/python/queue.py b/benchmarks/wrappers/gcp/python/queue.py new file mode 100644 index 00000000..f3ba7d35 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/queue.py @@ -0,0 +1,16 @@ +import os + +from google.cloud import pubsub_v1 + +class queue: + client = None + + def __init__(self, topic_name: str): + self.client = pubsub_v1.PublisherClient() + self.topic_name = 'projects/{project_id}/topics/{topic}'.format( + project_id=os.getenv('ACCOUNT_ID'), + topic=topic_name, + ) + + def send_message(self, message: str): + self.client.publish(self.topic_name, message.encode("utf-8")) diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index 81163cb3..6ce891a6 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -1,4 +1,5 @@ import io +import json import os import uuid @@ -21,8 +22,10 @@ def unique_name(name): random=str(uuid.uuid4()).split('-')[0] ) - def upload(self, bucket, file, filepath): + def upload(self, bucket, file, filepath, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(key_name) blob.upload_from_filename(filepath) @@ -41,8 +44,10 @@ def download_directory(self, bucket, prefix, path): os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) - def upload_stream(self, bucket, file, data): + def upload_stream(self, bucket, file, data, overwrite=False): key_name = storage.unique_name(file) + if (overwrite): + key_name = file bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(key_name) blob.upload_from_file(data) @@ -55,7 +60,27 @@ def download_stream(self, bucket, file): blob.download_to_file(data) return data.getbuffer() + def get_object(self, bucket, key): + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(key) + contents = blob.download_as_bytes() + return contents + def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance + + def list_objects(self, bucket, prefix=None): + res = self.client.list_blobs(bucket, prefix=prefix) + + objs = [] + for obj in res: + objs.append(obj.name) + + return objs + + def delete_object(self, bucket, key): + bucket = self.client.bucket(bucket) + blob = bucket.blob(key) + blob.delete() diff --git a/config/example.json b/config/example.json index dc4da9ad..f405a3be 100644 --- a/config/example.json +++ b/config/example.json @@ -6,7 +6,7 @@ "download_results": false, "runtime": { "language": "python", - "version": "3.7" + "version": "3.9" }, "type": "invocation-overhead", "perf-cost": { diff --git a/config/systems.json b/config/systems.json index bb21dcd9..143687c8 100644 --- a/config/systems.json +++ b/config/systems.json @@ -18,7 +18,8 @@ "python": { "base_images": { "3.7": "python:3.7-slim", - "3.8": "python:3.8-slim" + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim" }, "images": [ "run", @@ -70,7 +71,9 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py", + "misc.py" ], "packages": [] } @@ -113,10 +116,14 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py", + "misc.py" ], "packages": [ - "azure-storage-blob" + "azure-storage-blob", + "\nazure-storage-queue", + "\nazure-identity" ] } }, @@ -161,7 +168,9 @@ "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py", + "misc.py" ], "packages": [ "google-cloud-storage" diff --git a/docs/modularity.md b/docs/modularity.md index 7e3c7fcc..736c2f9d 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -267,7 +267,9 @@ Check other platforms to see how configuration is defined, for example, for AWS: "deployment": { "files": [ "handler.py", - "storage.py" + "storage.py", + "queue.py", + "misc.py" ], "packages": [] } @@ -303,6 +305,7 @@ Implement this step in the following function: language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int] ``` diff --git a/docs/platforms.md b/docs/platforms.md index 516f368b..d39c8d8e 100644 --- a/docs/platforms.md +++ b/docs/platforms.md @@ -85,9 +85,9 @@ AZURE_SECRET_PASSWORD = XXXXXXXXXXXXX You can pass the credentials either using the environment variables: ``` -export AZURE_SECRET_APPLICATION_ID = XXXXXXXXXXXXXXXX -export AZURE_SECRET_TENANT = XXXXXXXXXXXX -export AZURE_SECRET_PASSWORD = XXXXXXXXXXXXX +export AZURE_SECRET_APPLICATION_ID=XXXXXXXXXXXXXXXX +export AZURE_SECRET_TENANT=XXXXXXXXXXXX +export AZURE_SECRET_PASSWORD=XXXXXXXXXXXXX ``` or in the JSON input configuration: diff --git a/requirements.azure.txt b/requirements.azure.txt index f7d82499..4fed51ac 100644 --- a/requirements.azure.txt +++ b/requirements.azure.txt @@ -1 +1,3 @@ azure-storage-blob==12.10.0 +azure-storage-queue==12.9.0 +azure-identity==1.16.0 diff --git a/requirements.gcp.txt b/requirements.gcp.txt index 9cb90916..4550ac88 100644 --- a/requirements.gcp.txt +++ b/requirements.gcp.txt @@ -4,3 +4,4 @@ google-api-python-client==1.12.5 google-cloud-monitoring==2.0.0 google-api-python-client-stubs google-cloud-logging==2.0.0 +google-cloud-pubsub diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index c18b96c0..ffff5b9f 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -445,6 +445,8 @@ def __init__(self, cache_client, config, docker_client, language): function - function.py - storage.py + - queue.py + - misc.py - resources handler.py diff --git a/sebs.py b/sebs.py index 119f4439..9e7a076e 100755 --- a/sebs.py +++ b/sebs.py @@ -14,7 +14,7 @@ from sebs import SeBS from sebs.types import Storage as StorageTypes from sebs.regression import regression_suite -from sebs.utils import update_nested_dict, catch_interrupt +from sebs.utils import update_nested_dict, catch_interrupt, find_benchmark from sebs.faas import System as FaaSSystem from sebs.faas.function import Trigger @@ -173,7 +173,7 @@ def benchmark(): @click.option("--repetitions", default=5, type=int, help="Number of experimental repetitions.") @click.option( "--trigger", - type=click.Choice(["library", "http"]), + type=click.Choice(["library", "http", "queue", "storage"]), default="http", help="Function trigger to be used.", ) @@ -224,44 +224,134 @@ def invoke( if image_tag_prefix is not None: sebs_client.config.image_tag_prefix = image_tag_prefix + # Insert trigger into (experiment) config. Required by Azure when packaging. + trigger = trigger if trigger is not None else "http" + update_nested_dict(config, ["experiments", "trigger"], trigger) + experiment_config = sebs_client.get_experiment_config(config["experiments"]) update_nested_dict(config, ["experiments", "benchmark"], benchmark) - benchmark_obj = sebs_client.get_benchmark( - benchmark, - deployment_client, - experiment_config, - logging_filename=logging_filename, - ) - if memory is not None: - benchmark_obj.benchmark_config.memory = memory - if timeout is not None: - benchmark_obj.benchmark_config.timeout = timeout - - func = deployment_client.get_function( - benchmark_obj, - function_name if function_name else deployment_client.default_function_name(benchmark_obj), - ) - storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) - input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) - result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) - result.begin() + root_benchmark_path = find_benchmark(benchmark, "benchmarks") + if not root_benchmark_path: + raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=benchmark)) + with open(os.path.join(root_benchmark_path, "config.json")) as json_file: + root_benchmark_config = json.load(json_file) + + # Application handling. + benchmark_objs = {} + if ("type" in root_benchmark_config and root_benchmark_config["type"] == "app"): + list_subfolders = [f.name for f in os.scandir(root_benchmark_path) if f.is_dir()] + + for function in list_subfolders: + benchmark_obj = sebs_client.get_benchmark( + benchmark, + deployment_client, + experiment_config, + app_function_name=function, + logging_filename=logging_filename + ) + + application_name = deployment_client.default_application_name(benchmark_obj) + function_name = deployment_client.default_function_name(benchmark_obj) + + benchmark_obj.application_name = application_name + + # All functions within an application need to be connected to the + # result queue. + benchmark_obj.benchmark_config.result_queue = f"{application_name}-result" + + trigger = benchmark_obj.benchmark_config.trigger + if deployment_client.name() == "gcp" or deployment_client.name() == "azure": + function_name = "{}-{}".format(function_name, trigger) + + func = deployment_client.get_function(benchmark_obj, function_name) - trigger_type = Trigger.TriggerType.get(trigger) - triggers = func.triggers(trigger_type) - if len(triggers) == 0: - trigger = deployment_client.create_trigger(func, trigger_type) + storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) + input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) + + benchmark_objs[benchmark_obj] = func + + # Start timing from just before triggers are deployed. + result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) + result.begin() + + for benchmark_obj, func in benchmark_objs.items(): + trigger = benchmark_obj.benchmark_config.trigger + + trigger_type = Trigger.TriggerType.get(trigger) + triggers = func.triggers(trigger_type) + + if len(triggers) == 0: + if (benchmark_obj.benchmark_config.entrypoint): + trigger = deployment_client.create_trigger(func, trigger_type, with_result_queue=True) + else: + trigger = deployment_client.create_trigger(func, trigger_type) + else: + trigger = triggers[0] + + if (benchmark_obj.benchmark_config.entrypoint): + main_func = func + main_trigger = trigger + + func = main_func + trigger = main_trigger + + # Standalone function handling. else: - trigger = triggers[0] + benchmark_obj = sebs_client.get_benchmark( + benchmark, + deployment_client, + experiment_config, + logging_filename=logging_filename, + ) + if memory is not None: + benchmark_obj.benchmark_config.memory = memory + if timeout is not None: + benchmark_obj.benchmark_config.timeout = timeout + + function_name = function_name if function_name else deployment_client.default_function_name(benchmark_obj) + + # GCP and Azure only allow one trigger per function, so augment function name with + # trigger type: _http, _queue etc. + # + # Additionally, Azure requires for the trigger to be defined at deployment time. + if deployment_client.name() == "gcp" or deployment_client.name() == "azure": + function_name = "{}-{}".format(function_name, trigger) + + if trigger == "queue" or trigger == "storage": + benchmark_obj.benchmark_config.result_queue = "{}-result".format(function_name) + + func = deployment_client.get_function( + benchmark_obj, + function_name, + ) + storage = deployment_client.get_storage(replace_existing=experiment_config.update_storage) + input_config = benchmark_obj.prepare_input(storage=storage, size=benchmark_input_size) + + result = sebs.experiments.ExperimentResult(experiment_config, deployment_client.config) + result.begin() + + trigger_type = Trigger.TriggerType.get(trigger) + triggers = func.triggers(trigger_type) + if len(triggers) == 0: + if (trigger_type == Trigger.TriggerType.QUEUE or trigger_type == Trigger.TriggerType.STORAGE): + trigger = deployment_client.create_trigger(func, trigger_type, with_result_queue=True) + else: + trigger = deployment_client.create_trigger(func, trigger_type) + else: + trigger = triggers[0] + + # This part is common for both apps and functions. for i in range(repetitions): sebs_client.logging.info(f"Beginning repetition {i+1}/{repetitions}") ret = trigger.sync_invoke(input_config) - if ret.stats.failure: - sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - # deployment_client.get_invocation_error( - # function_name=func.name, start_time=start_time, end_time=end_time - # ) - result.add_invocation(func, ret) + for experiment in ret: + if experiment.stats.failure: + sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") + # deployment_client.get_invocation_error( + # function_name=func.name, start_time=start_time, end_time=end_time + # ) + result.add_invocation(func, experiment) result.end() result_file = os.path.join(output_dir, "experiments.json") diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 6dc70e52..a31f8dbc 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -119,6 +119,8 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: function - function.py - storage.py + - queue.py + - misc.py - resources handler.py @@ -132,6 +134,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { @@ -197,6 +200,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun language_runtime, self.config.resources.lambda_role(self.session), function_cfg, + code_package.application_name, ) self.update_function(lambda_function, code_package) lambda_function.updated_code = True @@ -222,6 +226,16 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_prefix} + + env_vars = {} + # Result queue added as an env variable. + if (code_package.benchmark_config.result_queue): + env_vars["RESULT_QUEUE"] = code_package.benchmark_config.result_queue + + # Application name added as an env variable. + if (code_package.application_name): + env_vars["APP_NAME"] = code_package.application_name + ret = self.client.create_function( FunctionName=func_name, Runtime="{}{}".format( @@ -232,6 +246,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun MemorySize=memory, Timeout=timeout, Code=code_config, + Environment={"Variables": env_vars} ) lambda_function = LambdaFunction( @@ -243,6 +258,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun self.config.resources.lambda_role(self.session), function_cfg, code_bucket, + code_package.application_name ) self.wait_function_active(lambda_function) @@ -250,7 +266,11 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun # Add LibraryTrigger to a new function from sebs.aws.triggers import LibraryTrigger - trigger = LibraryTrigger(func_name, self) + trigger = LibraryTrigger( + func_name, + self, + application_name=code_package.application_name + ) trigger.logging_handlers = self.logging_handlers lambda_function.add_trigger(trigger) @@ -258,13 +278,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun def cached_function(self, function: Function): - from sebs.aws.triggers import LibraryTrigger + from sebs.aws.triggers import LibraryTrigger, QueueTrigger, StorageTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).deployment_client = self for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers + for trigger in function.triggers(Trigger.TriggerType.QUEUE): + trigger.logging_handlers = self.logging_handlers + cast(QueueTrigger, trigger).deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.STORAGE): + trigger.logging_handlers = self.logging_handlers + cast(StorageTrigger, trigger).deployment_client = self """ Update function code and configuration on AWS. @@ -318,6 +344,10 @@ def update_function_configuration(self, function: Function, benchmark: Benchmark self.wait_function_updated(function) self.logging.info(f"Updated configuration of {function.name} function. ") + @staticmethod + def default_application_name(code_package: Benchmark) -> str: + return AWS.format_function_name(code_package.application_name) + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -483,11 +513,17 @@ def download_metrics( f"out of {results_count} invocations" ) - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.aws.triggers import HTTPTrigger + def create_trigger( + self, + func: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: + from sebs.aws.triggers import HTTPTrigger, QueueTrigger, StorageTrigger function = cast(LambdaFunction, func) + trigger: Trigger if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) @@ -501,7 +537,13 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T Principal="apigateway.amazonaws.com", SourceArn=f"{http_api.arn}/*/*", ) - trigger = HTTPTrigger(http_api.endpoint, api_name) + trigger = HTTPTrigger( + func.name, + url=http_api.endpoint, + api_id=api_name, + application_name=func.application_name, + with_result_queue=with_result_queue + ) self.logging.info( f"Created HTTP trigger for {func.name} function. " "Sleep 5 seconds to avoid cloud errors." @@ -511,6 +553,24 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T elif trigger_type == Trigger.TriggerType.LIBRARY: # should already exist return func.triggers(Trigger.TriggerType.LIBRARY)[0] + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger( + func.name, + application_name=func.application_name, + deployment_client=self, + with_result_queue=with_result_queue + ) + trigger.logging_handlers = self.logging_handlers + self.logging.info(f"Created Queue trigger for {func.name} function.") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger( + func.name, + application_name=func.application_name, + deployment_client=self, + with_result_queue=with_result_queue + ) + trigger.logging_handlers = self.logging_handlers + self.logging.info(f"Created Storage trigger for {func.name} function.") else: raise RuntimeError("Not supported!") diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 27aeb240..e36bc0a3 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -16,8 +16,9 @@ def __init__( role: str, cfg: FunctionConfig, bucket: Optional[str] = None, + application_name: Optional[str] = None ): - super().__init__(benchmark, name, code_package_hash, cfg) + super().__init__(benchmark, name, code_package_hash, cfg, application_name) self.arn = arn self.role = role self.runtime = runtime @@ -39,7 +40,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": from sebs.faas.function import Trigger - from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + from sebs.aws.triggers import LibraryTrigger, HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = LambdaFunction( @@ -51,11 +52,17 @@ def deserialize(cached_config: dict) -> "LambdaFunction": cached_config["role"], cfg, cached_config["bucket"], + cached_config["application_name"], ) for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + { + "Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger, + }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/queue.py b/sebs/aws/queue.py new file mode 100644 index 00000000..6a599e2a --- /dev/null +++ b/sebs/aws/queue.py @@ -0,0 +1,127 @@ +from typing import Optional, cast +from sebs.aws.aws import AWS +from sebs.cache import Cache +from sebs.faas.config import Resources +from sebs.faas.queue import Queue, QueueType + +import boto3 + + +class SQS(Queue): + @staticmethod + def typename() -> str: + return "AWS.SQS" + + @staticmethod + def deployment_name(): + return "aws" + + @property + def queue_url(self): + return self._queue_url + + @property + def queue_arn(self): + return self._queue_arn + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + region: str, + queue_url: Optional[str] = None, + queue_arn: Optional[str] = None + ): + super().__init__( + benchmark, + queue_type, + region + ) + self._queue_url = queue_url + self._queue_arn = queue_arn + + self.client = boto3.session.Session().client( + 'sqs', + region_name=self.region + ) + + def create_queue(self) -> str: + self.logging.debug(f"Creating queue {self.name}") + + if (self._queue_url and self._queue_arn): + self.logging.debug("Queue already exists, reusing...") + return + + self._queue_url = self.client.create_queue( + QueueName=self.name, + Attributes={ + # This currently works well in all cases - however it could be + # beneficial to adjust it based on the function's timeout. + "VisibilityTimeout": "540" + } + )["QueueUrl"] + self._queue_arn = self.client.get_queue_attributes( + QueueUrl=self.queue_url, + AttributeNames=["QueueArn"], + )["Attributes"]["QueueArn"] + + self.logging.debug("Created queue") + + def remove_queue(self): + self.logging.info(f"Deleting queue {self.name}") + + self.client.delete_queue(QueueUrl=self.queue_url) + + self.logging.info("Deleted queue") + + def send_message(self, serialized_message: str): + self.client.send_message( + QueueUrl=self.queue_url, + MessageBody=serialized_message, + ) + self.logging.info(f"Sent message to queue {self.name}") + + def receive_message(self) -> str: + self.logging.info(f"Pulling a message from {self.name}") + + response = self.client.receive_message( + QueueUrl=self.queue_url, + AttributeNames=["SentTimestamp"], + MaxNumberOfMessages=1, + MessageAttributeNames=["All"], + WaitTimeSeconds=5, + ) + + if ("Messages" not in response): + self.logging.info("No messages to be received") + return "" + + self.logging.info(f"Received a message from {self.name}") + + # Delete the message from the queue - serves as an acknowledgement + # that it was received. + self.client.delete_message( + QueueUrl=self.queue_url, + ReceiptHandle=response["Messages"][0]["ReceiptHandle"], + ) + + return response["Messages"][0]["Body"] + + def serialize(self) -> dict: + return { + "name": self.name, + "type": self.queue_type, + "region": self.region, + "queue_url": self.queue_url, + "queue_arn": self.queue_arn, + } + + @staticmethod + def deserialize(obj: dict) -> "SQS": + return SQS( + obj["name"], + obj["type"], + obj["region"], + obj["queue_url"], + obj["queue_arn"] + ) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f1831459..cb595191 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -2,22 +2,51 @@ import concurrent.futures import datetime import json -from typing import Dict, Optional # noqa +from typing import Optional +import uuid # noqa + +import boto3 from sebs.aws.aws import AWS +from sebs.aws.queue import SQS from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + def __init__( + self, + fname: str, + deployment_client: Optional[AWS] = None, + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() self.name = fname self._deployment_client = deployment_client + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = SQS( + f'{application_name}-result', + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: return "AWS.LibraryTrigger" + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + @property def deployment_client(self) -> AWS: assert self._deployment_client @@ -85,18 +114,49 @@ def async_invoke(self, payload: dict): return ret def serialize(self) -> dict: - return {"type": "Library", "name": self.name} + return { + "type": "Library", + "name": self.name, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue + } @staticmethod def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + return LibraryTrigger( + obj["name"], + None, + SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"] + ) class HTTPTrigger(Trigger): - def __init__(self, url: str, api_id: str): + def __init__( + self, + fname: str, + url: str, + api_id: str, + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() + self.name = fname self.url = url self.api_id = api_id + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = SQS( + f'{application_name}-result', + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -106,6 +166,11 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.url}") @@ -118,8 +183,292 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "HTTP", "url": self.url, "api-id": self.api_id} + return { + "type": "HTTP", + "name": self.name, + "url": self.url, + "api-id": self.api_id, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue + } + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return HTTPTrigger( + obj["name"], + obj["url"], + obj["api-id"], + SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"] + ) + + +class QueueTrigger(Trigger): + def __init__( + self, + fname: str, + deployment_client: Optional[AWS] = None, + queue: Optional[SQS] = None, + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False + ): + super().__init__() + self.name = fname + self._queue = queue + self._result_queue = result_queue + self._deployment_client = deployment_client + self.with_result_queue = with_result_queue + + if (not self._queue): + self._queue = SQS( + self.name, + QueueType.TRIGGER, + self.deployment_client.config.region + ) + self.queue.create_queue() + + # Add queue trigger + lambda_client = self.deployment_client.get_lambda_client() + if not len( + lambda_client.list_event_source_mappings( + EventSourceArn=self.queue.queue_arn, FunctionName=self.name + )["EventSourceMappings"] + ): + lambda_client.create_event_source_mapping( + EventSourceArn=self.queue.queue_arn, + FunctionName=self.name, + Enabled=True, + BatchSize=1, + MaximumBatchingWindowInSeconds=1, + ) + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = SQS( + f'{application_name}-result', + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() + + @staticmethod + def typename() -> str: + return "AWS.QueueTrigger" + + @property + def queue(self) -> SQS: + assert self._queue + return self._queue + + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + + @property + def deployment_client(self) -> AWS: + assert self._deployment_client + return self._deployment_client + + @deployment_client.setter + def deployment_client(self, deployment_client: AWS): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke function {self.name}") + + # Publish payload to queue + serialized_payload = json.dumps(payload) + begin = datetime.datetime.now() + self.queue.send_message(serialized_payload) + + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": "Queue", + "name": self.name, + "queue": self.queue.serialize(), + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue + } @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"], obj["api-id"]) + return QueueTrigger( + fname=obj["name"], + queue=SQS.deserialize(obj["queue"]), + result_queue=SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"] + ) + + +class StorageTrigger(Trigger): + def __init__( + self, + fname: str, + deployment_client: Optional[AWS] = None, + bucket_name: Optional[str] = None, + application_name: Optional[str] = None, + result_queue: Optional[SQS] = None, + with_result_queue: Optional[bool] = False + ): + super().__init__() + self.name = fname + + self._deployment_client = deployment_client + self._bucket_name = bucket_name + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # When creating the trigger for the first time, also create and store + # storage bucket information. + if not self._bucket_name: + # Init clients + s3 = boto3.resource("s3") + lambda_client = self.deployment_client.get_lambda_client() + + # AWS disallows underscores in bucket names + self._bucket_name = self.name.replace("_", "-") + function_arn = lambda_client.get_function(FunctionName=self.name)["Configuration"][ + "FunctionArn" + ] + + # Create bucket + self.logging.info(f"Creating bucket {self.bucket_name}") + + region = self.deployment_client.config.region + if region == "us-east-1": + s3.create_bucket(Bucket=self.bucket_name) + else: + s3.create_bucket( + Bucket=self.bucket_name, + CreateBucketConfiguration={"LocationConstraint": region}, + ) + + self.logging.info("Created bucket") + + lambda_client.add_permission( + FunctionName=self.name, + StatementId=str(uuid.uuid1()), + Action="lambda:InvokeFunction", + Principal="s3.amazonaws.com", + SourceArn=f"arn:aws:s3:::{self.bucket_name}", + ) + + # Add bucket trigger + bucket_notification = s3.BucketNotification(self.bucket_name) + bucket_notification.put( + NotificationConfiguration={ + "LambdaFunctionConfigurations": [ + { + "LambdaFunctionArn": function_arn, + "Events": ["s3:ObjectCreated:*"], + }, + ] + } + ) + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = SQS( + f'{application_name}-result', + QueueType.RESULT, + self.deployment_client.config.region + ) + self._result_queue.create_queue() + + @staticmethod + def typename() -> str: + return "AWS.StorageTrigger" + + @property + def bucket_name(self) -> str: + assert self._bucket_name + return self._bucket_name + + @property + def deployment_client(self) -> AWS: + assert self._deployment_client + return self._deployment_client + + @property + def result_queue(self) -> SQS: + assert self._result_queue + return self._result_queue + + @deployment_client.setter + def deployment_client(self, deployment_client: AWS): + self._deployment_client = deployment_client + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke function {self.name}") + + serialized_payload = json.dumps(payload) + + # Put object + s3 = boto3.resource("s3") + begin = datetime.datetime.now() + s3.Object(self.bucket_name, "payload.json").put(Body=serialized_payload) + self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") + + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": "Storage", + "name": self.name, + "bucket_name": self.bucket_name, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue + } + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger( + fname=obj["name"], + bucket_name=obj["bucket_name"], + result_queue=SQS.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"] + ) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index a9a54c1e..054259fd 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -12,7 +12,7 @@ from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction from sebs.azure.config import AzureConfig, AzureResources -from sebs.azure.triggers import AzureTrigger, HTTPTrigger +from sebs.azure.triggers import AzureTrigger, HTTPTrigger, QueueTrigger, StorageTrigger from sebs.faas.function import Trigger from sebs.benchmark import Benchmark from sebs.cache import Cache @@ -36,6 +36,10 @@ class Azure(System): def name(): return "azure" + @staticmethod + def typename(): + return "Azure" + @property def config(self) -> AzureConfig: return self._config @@ -145,6 +149,61 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage.replace_existing = replace_existing return self.storage + """ + Composes the JSON config that describes the trigger and bindings configs + for a given function to be run on Azure. + + :param benchmark: + :param exec_files: the files which define and implement the function to be executed + :return: JSON dictionary containing the function configuration + """ + + def create_function_json(self, benchmark, exec_files) -> Dict: + trigger = benchmark.split("-")[-1] + + if trigger == "queue": + return { + "scriptFile": exec_files, + "entryPoint": "handler_queue", + "bindings": [ + { + "name": "msg", + "type": "queueTrigger", + "direction": "in", + "queueName": benchmark, + "connection": "STORAGE_CONNECTION_STRING", + } + ], + } + elif trigger == "storage": + return { + "scriptFile": exec_files, + "entryPoint": "handler_storage", + "bindings": [ + { + "name": "blob", + "type": "blobTrigger", + "direction": "in", + "path": benchmark, + "connection": "STORAGE_CONNECTION_STRING", + } + ], + } + return { # HTTP + "scriptFile": exec_files, + "entryPoint": "handler_http", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["get", "post"], + }, + {"type": "http", "direction": "out", "name": "$return"}, + ], + } + # Directory structure # handler # - source files @@ -160,6 +219,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages @@ -179,23 +239,25 @@ def package_code( source_file = os.path.join(directory, f) shutil.move(source_file, handler_dir) + func_name = ( + "{}-{}-{}-{}-{}".format( + benchmark, + language_name, + language_version, + self.config.resources.resources_id, + trigger, + ) + .replace(".", "-") + .replace("_", "-") + ) + # generate function.json - # TODO: extension to other triggers than HTTP - default_function_json = { - "scriptFile": EXEC_FILES[language_name], - "bindings": [ - { - "authLevel": "anonymous", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["get", "post"], - }, - {"type": "http", "direction": "out", "name": "$return"}, - ], - } json_out = os.path.join(directory, "handler", "function.json") - json.dump(default_function_json, open(json_out, "w"), indent=2) + json.dump( + self.create_function_json(func_name, EXEC_FILES[language_name]), + open(json_out, "w"), + indent=2, + ) # generate host.json default_host_json = { @@ -287,11 +349,7 @@ def update_function(self, function: Function, code_package: Benchmark): # Mount code package in Docker instance self._mount_function_code(code_package) - url = self.publish_function(function, code_package, True) - - trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) + url = self.publish_function(function, code_package, True) def update_function_configuration(self, function: Function, code_package: Benchmark): # FIXME: this does nothing currently - we don't specify timeout @@ -302,6 +360,17 @@ def update_function_configuration(self, function: Function, code_package: Benchm def _mount_function_code(self, code_package: Benchmark): self.cli_instance.upload_package(code_package.code_location, "/mnt/function/") + def default_application_name(self, code_package: Benchmark) -> str: + func_name = ( + "{}-{}".format( + code_package.application_name, + self.config.resources.resources_id, + ) + .replace(".", "-") + .replace("_", "-") + ) + return func_name + def default_function_name(self, code_package: Benchmark) -> str: """ Functionapp names must be globally unique in Azure. @@ -368,6 +437,39 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct " --name {func_name} --storage-account {storage_account}" ).format(**config) ) + + # Add result queue env var. + result_queue_env = f"RESULT_QUEUE={code_package.benchmark_config.result_queue}" + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings {result_queue_env}" + ) + + # Add application name env var. + app_name_env = f"APP_NAME={code_package.application_name}" + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings {app_name_env}" + ) + + # Set the data storage account as env vars in the function. + resource_group = self.config.resources.resource_group(self.cli_instance) + data_storage_account = self.config.resources.data_storage_account(self.cli_instance) + + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings DATA_STORAGE_ACCOUNT={data_storage_account.account_name}" + ) + + self.cli_instance.execute( + f"az functionapp config appsettings set --name {func_name} " + f" --resource-group {resource_group} " + f" --settings STORAGE_CONNECTION_STRING={data_storage_account.connection_string}" + ) + self.logging.info("Azure: Created function app {}".format(func_name)) break except RuntimeError as e: @@ -385,6 +487,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct code_hash=code_package.hash, function_storage=function_storage_account, cfg=function_cfg, + application_name=code_package.application_name, ) # update existing function app @@ -399,7 +502,6 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct return function def cached_function(self, function: Function): - data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) @@ -508,12 +610,135 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) time.sleep(20) """ - The only implemented trigger at the moment is HTTPTrigger. - It is automatically created for each function. + Supports HTTP, queue and storage triggers, as specified by + the user when SeBS is run. """ - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - raise NotImplementedError() + def create_trigger( + self, + function: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: + + azure_function = cast(AzureFunction, function) + resource_group = self.config.resources.resource_group(self.cli_instance) + data_storage_account = self.config.resources.data_storage_account(self.cli_instance).account_name + + storage_account_scope = self.cli_instance.execute( + ("az storage account show --resource-group {} --name {} --query id").format( + resource_group, data_storage_account + ) + ).decode('utf-8') + + user_principal_name = self.cli_instance.execute("az ad user list").decode('utf-8') + + # All functions in an application need permission to write to the + # result queue. + if (function.application_name is not None): + function_principal = self.cli_instance.execute( + ( + 'az functionapp identity assign \ + --name {} \ + --resource-group {}' + ).format( + function.name, + self.config.resources.resource_group(self.cli_instance) + ) + ).decode('utf-8') + + self.cli_instance.execute( + ( + 'az role assignment create --assignee "{}" \ + --role "Storage Queue Data Contributor" \ + --scope {}' + ).format( + json.loads(function_principal)['principalId'], + storage_account_scope + ) + ) + + # Storage-triggered functions require Blob Storage access. + if (trigger_type == Trigger.TriggerType.STORAGE): + self.cli_instance.execute( + ( + 'az role assignment create --assignee "{}" \ + --role "Storage Blob Data Owner" \ + --scope {}' + ).format( + json.loads(user_principal_name)[0]["userPrincipalName"], + storage_account_scope, + ) + ) + + # Everything async needs queue access attached to the SeBS client. + if (function.application_name is not None + or trigger_type == Trigger.TriggerType.QUEUE + or trigger_type == Trigger.TriggerType.STORAGE + ): + self.cli_instance.execute( + ( + 'az role assignment create --assignee "{}" \ + --role "Storage Queue Data Contributor" \ + --scope {}' + ).format( + json.loads(user_principal_name)[0]["userPrincipalName"], + storage_account_scope, + ) + ) + + # Connect the function app to the result queue via Service + # Connector. + self.cli_instance.execute( + ( + 'az webapp connection create storage-queue \ + --resource-group {} \ + --target-resource-group {} \ + --account {} \ + --name {} \ + --system-identity' + ).format( + resource_group, + resource_group, + data_storage_account, + function.name + ) + ) + + trigger: Trigger + if trigger_type == Trigger.TriggerType.HTTP: + trigger = HTTPTrigger( + function.name, + url=url, + storage_account=data_storage_account, + application_name=function.application_name, + ) + self.logging.info(f"Created HTTP trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger( + function.name, + storage_account=data_storage_account, + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue, + ) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger( + function.name, + storage_account=data_storage_account, + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue, + ) + self.logging.info(f"Created Storage trigger for {function.name} function") + else: + raise RuntimeError("Not supported!") + + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) + self.cache_client.update_function(function) + return trigger # diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 61ef4c57..abff5f92 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,3 +1,5 @@ +from typing import cast, Optional + from sebs.azure.config import AzureResources from sebs.faas.function import Function, FunctionConfig @@ -10,10 +12,15 @@ def __init__( code_hash: str, function_storage: AzureResources.Storage, cfg: FunctionConfig, + application_name: Optional[str] = None ): - super().__init__(benchmark, name, code_hash, cfg) + super().__init__(benchmark, name, code_hash, cfg, application_name) self.function_storage = function_storage + @staticmethod + def typename() -> str: + return "Azure.AzureFunction" + def serialize(self) -> dict: return { **super().serialize(), @@ -22,6 +29,9 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + from sebs.faas.function import Trigger + from sebs.azure.triggers import HTTPTrigger, QueueTrigger, StorageTrigger + cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], @@ -29,11 +39,15 @@ def deserialize(cached_config: dict) -> Function: cached_config["hash"], AzureResources.Storage.deserialize(cached_config["function_storage"]), cfg, + cached_config["application_name"], ) - from sebs.azure.triggers import HTTPTrigger - for trigger in cached_config["triggers"]: - trigger_type = {"HTTP": HTTPTrigger}.get(trigger["type"]) + trigger_type = cast( + Trigger, + {"HTTP": HTTPTrigger, "Queue": QueueTrigger, "Storage": StorageTrigger}.get( + trigger["type"] + ), + ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret diff --git a/sebs/azure/queue.py b/sebs/azure/queue.py new file mode 100644 index 00000000..1dcbf922 --- /dev/null +++ b/sebs/azure/queue.py @@ -0,0 +1,101 @@ +import base64, time + +from sebs.faas.queue import Queue, QueueType + +from azure.core.exceptions import ResourceExistsError +from azure.identity import DefaultAzureCredential +from azure.storage.queue import QueueClient + + +class AzureQueue(Queue): + @staticmethod + def typename() -> str: + return "Azure.Queue" + + @staticmethod + def deployment_name(): + return "azure" + + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account + + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.queue.core.windows.net" + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + storage_account: str, + region: str + ): + default_credential = DefaultAzureCredential() + + super().__init__( + benchmark, + queue_type, + region + ) + self._storage_account = storage_account + self.client = QueueClient(self.account_url, + queue_name=self.name, + credential=default_credential) + + def create_queue(self): + self.logging.info(f"Creating queue {self.name}") + + try: + self.client.create_queue() + self.logging.info("Created queue") + except ResourceExistsError: + self.logging.info("Queue already exists, reusing...") + + def remove_queue(self): + self.logging.info(f"Deleting queue {self.name}") + + self.client.delete_queue() + + self.logging.info("Deleted queue") + + def send_message(self, serialized_message: str): + self.client.send_message(serialized_message) + self.logging.info(f"Sent message to queue {self.name}") + + def receive_message(self) -> str: + self.logging.info(f"Pulling a message from {self.name}") + + response = self.client.receive_messages( + max_messages=1, + timeout=5, + ) + + for msg in response: + self.logging.info(f"Received a message from {self.name}") + self.client.delete_message(msg) + msg = base64.b64decode(msg.content) + return msg + + self.logging.info("No messages to be received") + + time.sleep(5) + return "" + + def serialize(self) -> dict: + return { + "name": self.name, + "type": self.queue_type, + "storage_account": self.storage_account, + "region": self.region + } + + @staticmethod + def deserialize(obj: dict) -> "AzureQueue": + return AzureQueue( + obj["name"], + obj["type"], + obj["storage_account"], + obj["region"] + ) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 66be8c6d..b6c3b068 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,8 +1,19 @@ +import base64 import concurrent.futures +import datetime +import json +import time from typing import Any, Dict, Optional # noqa +from azure.core.exceptions import ResourceExistsError +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobServiceClient +from azure.storage.queue import QueueClient + from sebs.azure.config import AzureResources +from sebs.azure.queue import AzureQueue from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class AzureTrigger(Trigger): @@ -21,14 +32,40 @@ def data_storage_account(self, data_storage_account: AzureResources.Storage): class HTTPTrigger(AzureTrigger): - def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): + def __init__( + self, + fname: str, + url: str, + data_storage_account: Optional[AzureResources.Storage] = None, + result_queue: Optional[AzureQueue] = None, + with_result_queue: Optional[bool] = False + ): super().__init__(data_storage_account) + self.name = fname self.url = url + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = AzureQueue( + self.name, + QueueType.RESULT, + data_storage_account, + self.region + ) + self._result_queue.create_queue() @staticmethod def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: payload["connection_string"] = self.data_storage_account.connection_string @@ -40,8 +77,287 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "HTTP", "url": self.url} + return { + "type": "HTTP", + "name": self.name, + "url": self.url, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return HTTPTrigger( + obj["name"], + obj["url"], + AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], + ) + + +class QueueTrigger(Trigger): + def __init__( + self, + fname: str, + storage_account: str, + region: str, + queue: Optional[AzureQueue] = None, + application_name: Optional[str] = None, + result_queue: Optional[AzureQueue] = None, + with_result_queue: Optional[bool] = False + ): + super().__init__() + self.name = fname + self._storage_account = storage_account + self._region = region + self._queue = queue + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + if (not self._queue): + self._queue = AzureQueue( + self.name, + QueueType.TRIGGER, + self.storage_account, + self.region + ) + self.queue.create_queue() + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = AzureQueue( + f"{application_name}-result", + QueueType.RESULT, + self.storage_account, + self.region + ) + self._result_queue.create_queue() + + @staticmethod + def typename() -> str: + return "Azure.QueueTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account + + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def queue(self) -> AzureQueue: + assert self._queue + return self._queue + + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.queue.core.windows.net" + + @property + def queue_name(self) -> str: + assert self._queue_name + return self._queue_name + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Publish payload to queue + serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8") + begin = datetime.datetime.now() + self.queue.send_message(serialized_payload) + + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": "Queue", + "name": self.name, + "storage_account": self.storage_account, + "region": self.region, + "queue": self.queue.serialize(), + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return QueueTrigger( + fname=obj["name"], + storage_account=obj["storage_account"], + region=obj["region"], + queue=AzureQueue.deserialize(obj["queue"]), + result_queue=AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], + ) + + +class StorageTrigger(Trigger): + def __init__( + self, + fname: str, + storage_account: str, + region: str, + application_name: Optional[str] = None, + result_queue: Optional[AzureQueue] = None, + with_result_queue: Optional[bool] = False, + container_name: Optional[str] = None, + ): + super().__init__() + self.name = fname + self._storage_account = storage_account + self._region = region + self._result_queue = result_queue + self.with_result_queue = with_result_queue + self._container_name = None + + if container_name: + self._container_name = container_name + else: + # Having a container name field is currently a bit contrived - it is mostly + # a device to indicate that a trigger resource exists and is cached. In the + # future, we may adopt a different convention for naming trigger resources, + # at which point this will become truly useful. + self._container_name = self.name + + # Init client + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) + + # Create container + self.logging.info(f"Creating container {self.container_name}") + try: + blob_service_client.create_container(self.container_name) + self.logging.info("Created container") + except ResourceExistsError: + self.logging.info("Container already exists, reusing...") + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = AzureQueue( + f"{application_name}-result", + QueueType.RESULT, + self.storage_account, + self.region + ) + self._result_queue.create_queue() + + @staticmethod + def typename() -> str: + return "Azure.StorageTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + @property + def storage_account(self) -> str: + assert self._storage_account + return self._storage_account + + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> AzureQueue: + assert self._result_queue + return self._result_queue + + @property + def account_url(self) -> str: + return f"https://{self.storage_account}.blob.core.windows.net" + + @property + def container_name(self) -> str: + assert self._container_name + return self._container_name + + def sync_invoke(self, payload: dict) -> list[ExecutionResult]: + + self.logging.info(f"Invoke function {self.name}") + + # Prepare blob + file_name = "payload.json" + with open(file_name, "w") as fp: + json.dump(payload, fp) + + # Init client + default_credential = DefaultAzureCredential() + blob_service_client = BlobServiceClient(self.account_url, credential=default_credential) + + # Upload blob + blob_client = blob_service_client.get_blob_client( + container=self.container_name, blob=file_name + ) + begin = datetime.datetime.now() + with open(file=file_name, mode="rb") as payload_data: + blob_client.upload_blob(payload_data, overwrite=True) + self.logging.info(f"Uploaded payload to container {self.container_name}") + + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": "Storage", + "name": self.name, + "storage_account": self.storage_account, + "region": self.region, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + "container_name": self.container_name, + } @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"]) + return StorageTrigger( + fname=obj["name"], + storage_account=obj["storage_account"], + region=obj["region"], + result_queue=AzureQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], + container_name=obj["container_name"], + ) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 90eed6ae..b1e71935 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -5,7 +5,7 @@ import shutil import subprocess from abc import abstractmethod -from typing import Any, Callable, Dict, List, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple import docker @@ -13,6 +13,7 @@ from sebs.cache import Cache from sebs.faas.config import Resources from sebs.utils import find_benchmark, project_absolute_path, LoggingBase +# from sebs.faas.function import Trigger from sebs.faas.storage import PersistentStorage from typing import TYPE_CHECKING @@ -22,10 +23,21 @@ class BenchmarkConfig: - def __init__(self, timeout: int, memory: int, languages: List["Language"]): + def __init__( + self, + timeout: int, + memory: int, + languages: List["Language"], + trigger: Optional[str] = None, + entrypoint: Optional[bool] = False, + result_queue: Optional[str] = None + ): self._timeout = timeout self._memory = memory self._languages = languages + self._trigger = trigger + self._entrypoint = entrypoint + self._result_queue = result_queue @property def timeout(self) -> int: @@ -47,6 +59,26 @@ def memory(self, val: int): def languages(self) -> List["Language"]: return self._languages + @property + def trigger(self) -> str: + return self._trigger + + @trigger.setter + def trigger(self, val: str): + self._trigger = val + + @property + def entrypoint(self) -> bool: + return self._entrypoint + + @property + def result_queue(self) -> str: + return self._result_queue + + @result_queue.setter + def result_queue(self, val: str): + self._result_queue = val + # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(json_object: dict) -> "BenchmarkConfig": @@ -56,6 +88,9 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": json_object["timeout"], json_object["memory"], [Language.deserialize(x) for x in json_object["languages"]], + json_object["trigger"] if "trigger" in json_object else None, + json_object["entrypoint"] if "entrypoint" in json_object else None, + json_object["result_queue"] if "result_queue" in json_object else None ) @@ -136,6 +171,14 @@ def language_name(self) -> str: def language_version(self): return self._language_version + @property + def application_name(self) -> str: + return self._application_name + + @application_name.setter + def application_name(self, val: str): + self._application_name = val + @property # noqa: A003 def hash(self): path = os.path.join(self.benchmark_path, self.language_name) @@ -158,6 +201,7 @@ def __init__( output_dir: str, cache_client: Cache, docker_client: docker.client, + app_function_name: Optional[str] = None ): super().__init__() self._benchmark = benchmark @@ -165,9 +209,11 @@ def __init__( self._experiment_config = config self._language = config.runtime.language self._language_version = config.runtime.version - self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") + self._application_name = benchmark if app_function_name is not None else None + self._benchmark_path = find_benchmark(self.benchmark, "benchmarks", app_function_name) if not self._benchmark_path: - raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) + benchmark = f"{self._benchmark}-{app_function_name}" if app_function_name is not None else self._benchmark + raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=benchmark)) with open(os.path.join(self.benchmark_path, "config.json")) as json_file: self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) @@ -180,9 +226,15 @@ def __init__( self._docker_client = docker_client self._system_config = system_config self._hash_value = None - self._output_dir = os.path.join( - output_dir, f"{benchmark}_code", self._language.value, self._language_version - ) + if (self.application_name): + self._output_dir = os.path.join( + output_dir, f"{benchmark}_code", app_function_name, self._language.value, self._language_version + ) + self._benchmark = '{}.{}'.format(self._benchmark, app_function_name) + else: + self._output_dir = os.path.join( + output_dir, f"{benchmark}_code", self._language.value, self._language_version + ) # verify existence of function in cache self.query_cache() @@ -470,7 +522,7 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str, str, bool], Tuple[str, int]] + self, deployment_build_step: Callable[[str, str, str, str, bool, Optional[str]], Tuple[str, int]] ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild @@ -505,6 +557,7 @@ def build( self.language_version, self.benchmark, self.is_cached_valid, + self.benchmark_config.trigger, ) self.logging.info( ( @@ -538,8 +591,12 @@ def build( """ def prepare_input(self, storage: PersistentStorage, size: str): - benchmark_data_path = find_benchmark(self._benchmark, "benchmarks-data") - mod = load_benchmark_input(self._benchmark_path) + # The root benchmark name, i.e. xxx.airline-booking. + root_benchmark = '{}.{}'.format(self.benchmark.split('.')[0], self.benchmark.split('.')[1]) + benchmark_data_path = find_benchmark(root_benchmark, "benchmarks-data") + + temp_path = find_benchmark(root_benchmark, "benchmarks") + mod = load_benchmark_input(temp_path) buckets = mod.buckets_count() input, output = storage.benchmark_data(self.benchmark, buckets) @@ -558,7 +615,7 @@ def prepare_input(self, storage: PersistentStorage, size: str): self._cache_client.update_storage( storage.deployment_name(), - self._benchmark, + self.benchmark, { "buckets": { "input": storage.input_prefixes, diff --git a/sebs/cache.py b/sebs/cache.py index ed5096e6..daf50ef9 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -162,7 +162,9 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(cached_config, fp, indent=2) - def add_code_package(self, deployment_name: str, language_name: str, code_package: "Benchmark"): + def add_code_package( + self, deployment_name: str, language_name: str, code_package: "Benchmark" + ): with self._lock: language = code_package.language_name language_version = code_package.language_version diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index a5ca3f0b..51cedd52 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -1,6 +1,6 @@ from typing import Dict -from sebs.faas.function import Runtime +from sebs.faas.function import Runtime, Trigger class Config: @@ -11,6 +11,7 @@ def __init__(self): self._flags: Dict[str, bool] = {} self._experiment_configs: Dict[str, dict] = {} self._runtime = Runtime(None, None) + self._trigger: Trigger.TriggerType @property def update_code(self) -> bool: @@ -31,6 +32,10 @@ def check_flag(self, key: str) -> bool: def runtime(self) -> Runtime: return self._runtime + @property + def trigger(self) -> Trigger.TriggerType: + return self._trigger + def experiment_settings(self, name: str) -> dict: return self._experiment_configs[name] @@ -42,6 +47,7 @@ def serialize(self) -> dict: "runtime": self._runtime.serialize(), "flags": self._flags, "experiments": self._experiment_configs, + "trigger": self._trigger, } return out @@ -55,6 +61,7 @@ def deserialize(config: dict) -> "Config": cfg._download_results = config["download_results"] cfg._runtime = Runtime.deserialize(config["runtime"]) cfg._flags = config["flags"] if "flags" in config else {} + cfg._trigger = config["trigger"] if "trigger" in config else {} from sebs.experiments import ( NetworkPingPong, diff --git a/sebs/faas/function.py b/sebs/faas/function.py index c2226cee..55265a9d 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -10,6 +10,7 @@ from typing import Callable, Dict, List, Optional, Type, TypeVar # noqa from sebs.benchmark import Benchmark +from sebs.faas.queue import Queue from sebs.utils import LoggingBase """ @@ -179,6 +180,7 @@ class TriggerType(Enum): HTTP = "http" LIBRARY = "library" STORAGE = "storage" + QUEUE = "queue" @staticmethod def get(name: str) -> "Trigger.TriggerType": @@ -236,6 +238,53 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec self.logging.error("No output provided!") raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") + # Common method to collect the measurement results of applications or + # queue/storage-triggered functions. + # + # :param result_queue: The result queue to read from. + # :return: dictionary from end timestamps to the actual measurement data + def collect_async_results(self, result_queue: Queue) -> dict: + # Executions map from function invocation id to the # of new functions + # invoked by that id. + executions = {} + ret = {} + message = "" + + while (True): + message = result_queue.receive_message() + if (message != ""): + end = datetime.now() + + message = json.loads(message) + ret[end] = message + + if ('fns_triggered' in message['result']): + fns_triggered = message['result']['fns_triggered'] + execution_id = message['request_id'] + + if (execution_id not in executions): + executions[execution_id] = fns_triggered + else: + executions[execution_id] += fns_triggered + if (executions[execution_id] == 0): + executions.pop(execution_id) + + if ('parent_execution_id' in message['result']): + parent_execution_id = message['result']['parent_execution_id'] + + if (parent_execution_id in executions): + executions[parent_execution_id] -= 1 + else: + executions[parent_execution_id] = -1 + if (executions[parent_execution_id] == 0): + executions.pop(parent_execution_id) + + if (not executions): + break + + message = "" + return ret + # FIXME: 3.7+, future annotations @staticmethod @abstractmethod @@ -347,7 +396,14 @@ def serialize(self) -> dict: class Function(LoggingBase): - def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): + def __init__( + self, + benchmark: str, + name: str, + code_hash: str, + cfg: FunctionConfig, + application_name: Optional[str] = None + ): super().__init__() self._benchmark = benchmark self._name = name @@ -355,6 +411,7 @@ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfi self._updated_code = False self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} self._cfg = cfg + self._application_name = application_name @property def config(self) -> FunctionConfig: @@ -368,6 +425,10 @@ def name(self): def benchmark(self): return self._benchmark + @property + def application_name(self): + return self._application_name + @property def code_package_hash(self): return self._code_package_hash @@ -408,6 +469,7 @@ def serialize(self) -> dict: "triggers": [ obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers ], + "application_name": self._application_name, } @staticmethod diff --git a/sebs/faas/queue.py b/sebs/faas/queue.py new file mode 100644 index 00000000..b0b5b2ca --- /dev/null +++ b/sebs/faas/queue.py @@ -0,0 +1,62 @@ +from abc import ABC +from abc import abstractmethod +from enum import Enum + +from sebs.utils import LoggingBase + +class QueueType(str, Enum): + TRIGGER = "trigger" + RESULT = "result" + + +class Queue(ABC, LoggingBase): + + @staticmethod + @abstractmethod + def deployment_name() -> str: + pass + + @property + def region(self): + return self._region + + @property + def queue_type(self): + return self._queue_type + + @property + def name(self): + return self._name + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + region: str + ): + super().__init__() + self._name = benchmark + + # Convention: the trigger queue carries the name of the function. The + # result queue carries the name of the function + "-result". + if (queue_type == QueueType.RESULT and not benchmark.endswith("-result")): + self._name = "{}-{}".format(benchmark, queue_type) + + self._queue_type = queue_type + self._region = region + + @abstractmethod + def create_queue(self): + pass + + @abstractmethod + def remove_queue(self): + pass + + @abstractmethod + def send_message(self, serialized_message: str): + pass + + @abstractmethod + def receive_message(self) -> str: + pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 5b93c053..890c68cc 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -143,20 +143,22 @@ def remove_bucket(self, bucket: str): def benchmark_data( self, benchmark: str, requested_buckets: Tuple[int, int] ) -> Tuple[List[str], List[str]]: + # The root benchmark name, i.e. xxx.map-reduce. + root_benchmark = '{}.{}'.format(benchmark.split('.')[0], benchmark.split('.')[1]) """ Add an input path inside benchmarks bucket. Bucket name format: name-idx-input """ for i in range(0, requested_buckets[0]): - self.input_prefixes.append("{}-{}-input".format(benchmark, i)) + self.input_prefixes.append("{}-{}-input".format(root_benchmark, i)) """ Add an input path inside benchmarks bucket. Bucket name format: name-idx-output """ for i in range(0, requested_buckets[1]): - self.output_prefixes.append("{}-{}-output".format(benchmark, i)) + self.output_prefixes.append("{}-{}-output".format(root_benchmark, i)) cached_storage = self.cache_client.get_storage_config(self.deployment_name(), benchmark) self.cached = True diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 17116e69..8cdd1fb7 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -167,6 +167,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: pass @@ -317,6 +318,10 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma return changed + @abstractmethod + def default_application_name(self, code_package: Benchmark) -> str: + pass + @abstractmethod def default_function_name(self, code_package: Benchmark) -> str: pass @@ -337,7 +342,12 @@ def download_metrics( pass @abstractmethod - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_trigger( + self, + function: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: pass # @abstractmethod diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 6736c1ca..21f19c9d 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -13,8 +13,9 @@ def __init__( code_package_hash: str, cfg: FunctionConfig, bucket: Optional[str] = None, + application_name: Optional[str] = None ): - super().__init__(benchmark, name, code_package_hash, cfg) + super().__init__(benchmark, name, code_package_hash, cfg, application_name) self.bucket = bucket @staticmethod @@ -30,7 +31,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger, QueueTrigger, StorageTrigger cfg = FunctionConfig.deserialize(cached_config["config"]) ret = GCPFunction( @@ -39,11 +40,17 @@ def deserialize(cached_config: dict) -> "GCPFunction": cached_config["hash"], cfg, cached_config["bucket"], + cached_config["application_name"], ) for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + { + "Library": LibraryTrigger, + "HTTP": HTTPTrigger, + "Queue": QueueTrigger, + "Storage": StorageTrigger, + }.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 47526d33..ea3fd73f 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -103,6 +103,98 @@ def get_storage( self.storage.replace_existing = replace_existing return self.storage + """ + Provide the fully qualified name of a trigger resource (queue or storage). + """ + + def get_trigger_resource_name(self, func_name: str) -> str: + trigger = func_name.split("-")[-1] + + assert trigger == "queue" or trigger == "storage" + + if trigger == "queue": + return "projects/{project_name}/topics/{topic}".format( + project_name=self.config.project_name, topic=func_name + ) + else: + return "projects/{project_name}/buckets/{bucket}".format( + project_name=self.config.project_name, bucket=func_name + ) + + """ + Trigger resources (queue, bucket) must exist on GCP before the + corresponding function is first deployed. + + This function creates the required resources and returns a dict + containing trigger information required by create_req inside of + create_function. + + :param func_name: the name of the function to be deployed, + including its trigger + + :param cached: when True, skip the creation of the actual resource + - merely create the configuration required to deploy the function. + This option is used in update_function() only. + + :return: JSON/dict with the trigger configuration required by GCP + on function creation/update + """ + + def create_trigger_resource(self, func_name: str, cached=False) -> Dict: + trigger = func_name.split("-")[-1] + + if trigger == "queue": + topic_name = self.get_trigger_resource_name(func_name) + + if not cached: + pub_sub = build("pubsub", "v1", cache_discovery=False) + + self.logging.info(f"Creating queue '{topic_name}'") + try: + pub_sub.projects().topics().create(name=topic_name).execute() + self.logging.info("Created queue") + except HttpError as http_error: + if http_error.resp.status == 409: + self.logging.info("Queue already exists, reusing...") + + return { + "eventTrigger": { + "eventType": "providers/cloud.pubsub/eventTypes/topic.publish", + "resource": topic_name, + }, + "entryPoint": "handler_queue", + } + elif trigger == "storage": + bucket_name = self.get_trigger_resource_name(func_name) + + if not cached: + storage = build("storage", "v1", cache_discovery=False) + + self.logging.info(f"Creating storage bucket '{bucket_name}'") + try: + storage.buckets().insert( + project=self.config.project_name, + body={"name": func_name}, + ).execute() + self.logging.info("Created storage bucket") + except HttpError as http_error: + if http_error.resp.status == 409: + self.logging.info("Storage bucket already exists, reusing...") + + return { + "eventTrigger": { + "eventType": "google.storage.object.finalize", + "resource": bucket_name, + }, + "entryPoint": "handler_storage", + } + # HTTP triggers do not require resource creation + return {"httpsTrigger": {}, "entryPoint": "handler_http"} + + @staticmethod + def default_application_name(code_package: Benchmark) -> str: + return GCP.format_function_name(code_package.application_name) + @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -140,6 +232,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { @@ -159,7 +252,8 @@ def package_code( shutil.move(file, function_dir) requirements = open(os.path.join(directory, "requirements.txt"), "w") - requirements.write("google-cloud-storage") + requirements.write("google-cloud-storage\n") + requirements.write("google-cloud-pubsub") requirements.close() # rename handler function.py since in gcp it has to be caled main.py @@ -215,9 +309,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti full_func_name = GCP.get_full_function_name(project_name, location, func_name) get_req = self.function_client.projects().locations().functions().get(name=full_func_name) + # Add result queue and application name env vars. + env_vars = { + "RESULT_QUEUE": code_package.benchmark_config.result_queue, + "APP_NAME": code_package.application_name + } + try: get_req.execute() except HttpError: + # Before creating the function, ensure all trigger resources (queue, + # bucket) exist on GCP. + trigger_info = self.create_trigger_resource(func_name) + create_req = ( self.function_client.projects() .locations() @@ -228,14 +332,14 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti ), body={ "name": full_func_name, - "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": memory, "timeout": str(timeout) + "s", - "httpsTrigger": {}, "ingressSettings": "ALLOW_ALL", "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - }, + "environmentVariables": env_vars, + } + | trigger_info, ) ) create_req.execute() @@ -259,7 +363,12 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti self.logging.info(f"Function {func_name} accepts now unauthenticated invocations!") function = GCPFunction( - func_name, benchmark, code_package.hash, function_cfg, code_bucket + name=func_name, + benchmark=benchmark, + code_package_hash=code_package.hash, + cfg=function_cfg, + bucket=code_bucket, + application_name=code_package.application_name ) else: # if result is not empty, then function does exists @@ -271,6 +380,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti code_package_hash=code_package.hash, cfg=function_cfg, bucket=code_bucket, + application_name=code_package.application_name ) self.update_function(function, code_package) @@ -283,29 +393,58 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti return function - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.gcp.triggers import HTTPTrigger + def create_trigger( + self, + function: Function, + trigger_type: Trigger.TriggerType, + with_result_queue: Optional[bool] = False + ) -> Trigger: + from sebs.gcp.triggers import HTTPTrigger, QueueTrigger, StorageTrigger - if trigger_type == Trigger.TriggerType.HTTP: + location = self.config.region + project_name = self.config.project_name + full_func_name = GCP.get_full_function_name(project_name, location, function.name) + self.logging.info(f"Function {function.name} - waiting for deployment...") + our_function_req = ( + self.function_client.projects().locations().functions().get(name=full_func_name) + ) + deployed = False + while not deployed: + status_res = our_function_req.execute() + if status_res["status"] == "ACTIVE": + deployed = True + else: + time.sleep(3) + self.logging.info(f"Function {function.name} - deployed!") - location = self.config.region - project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) - self.logging.info(f"Function {function.name} - waiting for deployment...") - our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) - ) - deployed = False - while not deployed: - status_res = our_function_req.execute() - if status_res["status"] == "ACTIVE": - deployed = True - else: - time.sleep(3) - self.logging.info(f"Function {function.name} - deployed!") + trigger: Trigger + if trigger_type == Trigger.TriggerType.HTTP: invoke_url = status_res["httpsTrigger"]["url"] - - trigger = HTTPTrigger(invoke_url) + trigger = HTTPTrigger( + function.name, + url=invoke_url, + application_name=function.application_name, + with_result_queue=with_result_queue + ) + self.logging.info(f"Created HTTP trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.QUEUE: + trigger = QueueTrigger( + function.name, + queue_name=self.get_trigger_resource_name(function.name), + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue + ) + self.logging.info(f"Created Queue trigger for {function.name} function") + elif trigger_type == Trigger.TriggerType.STORAGE: + trigger = StorageTrigger( + function.name, + bucket_name=self.get_trigger_resource_name(function.name), + region=self.config.region, + application_name=function.application_name, + with_result_queue=with_result_queue + ) + self.logging.info(f"Created Storage trigger for {function.name} function") else: raise RuntimeError("Not supported!") @@ -317,12 +456,20 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def cached_function(self, function: Function): from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger + from sebs.gcp.triggers import LibraryTrigger, QueueTrigger, StorageTrigger + gcp_trigger: Trigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers gcp_trigger.deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.QUEUE): + gcp_trigger = cast(QueueTrigger, trigger) + gcp_trigger.logging_handlers = self.logging_handlers + gcp_trigger.deployment_client = self + for trigger in function.triggers(Trigger.TriggerType.STORAGE): + gcp_trigger = cast(StorageTrigger, trigger) + gcp_trigger.logging_handlers = self.logging_handlers def update_function(self, function: Function, code_package: Benchmark): @@ -337,6 +484,17 @@ def update_function(self, function: Function, code_package: Benchmark): full_func_name = GCP.get_full_function_name( self.config.project_name, self.config.region, function.name ) + + # Before creating the function, ensure all trigger resources (queue, + # bucket) exist on GCP. + trigger_info = self.create_trigger_resource(function.name, cached=True) + + # Add result queue and applcation name env vars. + env_vars = { + "RESULT_QUEUE": code_package.benchmark_config.result_queue, + "APP_NAME": code_package.application_name + } + req = ( self.function_client.projects() .locations() @@ -345,13 +503,13 @@ def update_function(self, function: Function, code_package: Benchmark): name=full_func_name, body={ "name": full_func_name, - "entryPoint": "handler", "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": function.config.memory, "timeout": str(function.config.timeout) + "s", - "httpsTrigger": {}, "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - }, + "environmentVariables": env_vars, + } + | trigger_info, ) ) res = req.execute() diff --git a/sebs/gcp/queue.py b/sebs/gcp/queue.py new file mode 100644 index 00000000..189fafef --- /dev/null +++ b/sebs/gcp/queue.py @@ -0,0 +1,124 @@ +from sebs.faas.queue import Queue, QueueType + +from google.api_core import retry +from google.api_core.exceptions import AlreadyExists +from google.cloud import pubsub_v1 + +import os + + +class GCPQueue(Queue): + @staticmethod + def typename() -> str: + return "GCP.Queue" + + @staticmethod + def deployment_name(): + return "gcp" + + @property + def topic_name(self): + return self._topic_name + + @property + def subscription_name(self): + return self._subscription_name + + @property + def subscription_client(self): + return self._subscription_client + + def __init__( + self, + benchmark: str, + queue_type: QueueType, + region: str + ): + super().__init__( + benchmark, + queue_type, + region + ) + self.client = pubsub_v1.PublisherClient() + self._subscription_client = pubsub_v1.SubscriberClient() + + self._topic_name = 'projects/{project_id}/topics/{topic}'.format( + project_id=os.getenv('GOOGLE_CLOUD_PROJECT'), + topic=self.name, + ) + self._subscription_name = 'projects/{project_id}/subscriptions/{sub}'.format( + project_id=os.getenv('GOOGLE_CLOUD_PROJECT'), + sub=self.name, + ) + + def create_queue(self): + self.logging.info(f"Creating queue {self.name}") + try: + self.client.create_topic(name=self.topic_name) + self.logging.info("Created queue") + except AlreadyExists: + self.logging.info("Queue already exists, reusing...") + + # GCP additionally needs a 'subscription' resource which is the + # actual receiver of the messages. It is constructed and destructed + # alongside the topic at all times. + self.logging.info(f"Creating queue subscription") + try: + self.subscription_client.create_subscription( + name=self.subscription_name, + topic=self.topic_name + ) + self.logging.info("Created queue subscription") + except AlreadyExists: + self.logging.info("Subscription already exists, reusing...") + + def remove_queue(self): + self.logging.info(f"Deleting queue and associated subscription{self.name}") + + self.client.delete_topic(topic=self.topic_name) + self.subscription_client.delete_subscription(subscription=self.subscription_name) + + self.logging.info("Deleted queue and associated subscription") + + def send_message(self, serialized_message: str): + self.client.publish(self.topic_name, serialized_message.decode("utf-8")) + self.logging.info(f"Sent message to queue {self.name}") + + # Receive messages through the 'pull' (sync) method. + def receive_message(self) -> str: + self.logging.info(f"Pulling a message from {self.name}") + + response = self.subscription_client.pull( + subscription=self.subscription_name, + max_messages=1, + retry=retry.Retry(deadline=5), + ) + + if (len(response.received_messages) == 0): + self.logging.info("No messages to be received") + return "" + + # Acknowledge the received message so it is not sent again. + received_message = response.received_messages[0] + self.subscription_client.acknowledge( + subscription=self.subscription_name, + ack_ids=[received_message.ack_id], + ) + self.logging.info(f"Received a message from {self.name}") + + return received_message.message.data + + def serialize(self) -> dict: + return { + "name": self.name, + "type": self.queue_type, + "region": self.region, + } + + @staticmethod + def deserialize(obj: dict) -> "GCPQueue": + return GCPQueue( + obj["name"], + obj["type"], + obj["region"], + ) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6c..72bdff10 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -1,18 +1,45 @@ +import base64 import concurrent.futures import datetime import json +import os import time +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError from typing import Dict, Optional # noqa +from google.cloud import storage as gcp_storage + from sebs.gcp.gcp import GCP +from sebs.gcp.queue import GCPQueue from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.queue import QueueType class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + def __init__( + self, + fname: str, + deployment_client: Optional[GCP] = None, + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() self.name = fname self._deployment_client = deployment_client + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = GCPQueue( + f"{application_name}-result", + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -31,6 +58,11 @@ def deployment_client(self, deployment_client: GCP): def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.LIBRARY + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") @@ -75,17 +107,46 @@ def async_invoke(self, payload: dict): raise NotImplementedError() def serialize(self) -> dict: - return {"type": "Library", "name": self.name} + return { + "type": "Library", + "name": self.name, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } @staticmethod def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + return LibraryTrigger( + obj["name"], + GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], + ) class HTTPTrigger(Trigger): - def __init__(self, url: str): + def __init__( + self, + fname: str, + url: str, + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False + ): super().__init__() + self.name = fname self.url = url + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = GCPQueue( + f"{application_name}-result", + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() @staticmethod def typename() -> str: @@ -95,6 +156,11 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.url}") @@ -106,8 +172,234 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: return fut def serialize(self) -> dict: - return {"type": "HTTP", "url": self.url} + return { + "type": "HTTP", + "name": self.name, + "url": self.url, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return HTTPTrigger( + obj["name"], + obj["url"], + GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + obj["with_result_queue"], + ) + + +class QueueTrigger(Trigger): + def __init__( + self, + fname: str, + queue_name: str, + region: str, + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False + ): + super().__init__() + self.name = fname + self._queue_name = queue_name + self._region = region + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = GCPQueue( + f"{application_name}-result", + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() + + @staticmethod + def typename() -> str: + return "GCP.QueueTrigger" + + @property + def queue_name(self) -> str: + assert self._queue_name + return self._queue_name + + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.QUEUE + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init client + pub_sub = build("pubsub", "v1", cache_discovery=False) + + # Prepare payload + # GCP is very particular with data encoding... + serialized_payload = base64.b64encode(json.dumps(payload).encode("utf-8")) + + # Publish payload to queue + begin = datetime.datetime.now() + pub_sub.projects().topics().publish( + topic=self.queue_name, + body={ + "messages": [{"data": serialized_payload.decode("utf-8")}], + }, + ).execute() + + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": "Queue", + "name": self.name, + "queue_name": self.queue_name, + "region": self.region, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"]) + return QueueTrigger( + fname=obj["name"], + queue_name=obj["queue_name"], + region=obj["region"], + result_queue=GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], + ) + + +class StorageTrigger(Trigger): + def __init__( + self, + fname: str, + bucket_name: str, + region: str, + application_name: Optional[str] = None, + result_queue: Optional[GCPQueue] = None, + with_result_queue: Optional[bool] = False + ): + super().__init__() + self.name = fname + self._bucket_name = bucket_name + self._region = region + self._result_queue = result_queue + self.with_result_queue = with_result_queue + + # Create result queue for communicating benchmark results back to the + # client. + if (self.with_result_queue and not self._result_queue): + self._result_queue = GCPQueue( + f"{application_name}-result", + QueueType.RESULT, + self.region + ) + self._result_queue.create_queue() + + @staticmethod + def typename() -> str: + return "GCP.StorageTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.STORAGE + + @property + def bucket_name(self) -> str: + assert self._bucket_name + return self._bucket_name + + @property + def region(self) -> str: + assert self._region + return self._region + + @property + def result_queue(self) -> GCPQueue: + assert self._result_queue + return self._result_queue + + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.info(f"Invoke function {self.name}") + + # Init clients + client = gcp_storage.Client() + bucket_instance = client.bucket(self.name) + + # Prepare payload + file_name = "payload.json" + with open(file_name, "w") as fp: + json.dump(payload, fp) + + # Upload object + gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 + blob = bucket_instance.blob(blob_name=file_name, chunk_size=4 * 1024 * 1024) + begin = datetime.datetime.now() + blob.upload_from_filename(file_name) + + self.logging.info(f"Uploaded payload to bucket {self.bucket_name}") + + results = self.collect_async_results(self.result_queue) + + ret = [] + for recv_ts, result_data in results.items(): + result = ExecutionResult.from_times(begin, recv_ts) + result.parse_benchmark_output(result_data) + ret.append(result) + + return ret + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": "Storage", + "name": self.name, + "bucket_name": self.bucket_name, + "region": self.region, + "result_queue": self._result_queue.serialize() if self._result_queue else "", + "with_result_queue": self.with_result_queue, + } + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return StorageTrigger( + fname=obj["name"], + bucket_name=obj["bucket_name"], + region=obj["region"], + result_queue=GCPQueue.deserialize(obj["result_queue"]) if obj["result_queue"] != "" else None, + with_result_queue=obj["with_result_queue"], + ) diff --git a/sebs/local/local.py b/sebs/local/local.py index cb1aabe2..1c975461 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -132,6 +132,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: CONFIG_FILES = { diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 00660de9..43c9cd54 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -208,6 +208,7 @@ def package_code( language_version: str, benchmark: str, is_cached: bool, + trigger: Optional[Trigger.TriggerType], ) -> Tuple[str, int]: # Regardless of Docker image status, we need to create .zip file diff --git a/sebs/sebs.py b/sebs/sebs.py index 58bc07a9..149f0090 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -162,6 +162,7 @@ def get_benchmark( name: str, deployment: FaaSSystem, config: ExperimentConfig, + app_function_name: Optional[str] = None, logging_filename: Optional[str] = None, ) -> Benchmark: benchmark = Benchmark( @@ -172,6 +173,7 @@ def get_benchmark( self._output_dir, self.cache_client, self.docker_client, + app_function_name=app_function_name ) benchmark.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename diff --git a/sebs/utils.py b/sebs/utils.py index 3df8ffc9..995a1354 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -128,13 +128,19 @@ def configure_logging(): :param benchmark: Benchmark name. :param path: Path for lookup, relative to repository. + :param function: [Optional, for apps] the particular function we are + looking for. :return: relative path to directory corresponding to benchmark """ -def find_benchmark(benchmark: str, path: str): +def find_benchmark(benchmark: str, path: str, function: Optional[str] = None): benchmarks_dir = os.path.join(PROJECT_DIR, path) benchmark_path = find(benchmark, benchmarks_dir) + + if (function): + benchmark_path = find(function, benchmark_path) + return benchmark_path diff --git a/tests/aws/create_function.py b/tests/aws/create_function.py index e672cc89..bb22cfb0 100644 --- a/tests/aws/create_function.py +++ b/tests/aws/create_function.py @@ -35,8 +35,8 @@ class AWSCreateFunction(unittest.TestCase): } } package_files = { - "python": ["handler.py", "function/storage.py", "requirements.txt", '.python_packages/'], - "nodejs": ["handler.js", "function/storage.js", "package.json", "node_modules/"] + "python": ["handler.py", "function/storage.py", "function/queue.py", "requirements.txt", '.python_packages/'], + "nodejs": ["handler.js", "function/storage.js", "function/queue.js", "package.json", "node_modules/"] } benchmark = "110.dynamic-html" function_name_suffixes = []