diff --git a/benchmarks-data b/benchmarks-data index 6a17a460..f407c248 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb +Subproject commit f407c24814f623f77dcb535d882c241909ae7588 diff --git a/benchmarks/300.utilities/321.pdf-generator/config.json b/benchmarks/300.utilities/321.pdf-generator/config.json new file mode 100644 index 00000000..28a80c59 --- /dev/null +++ b/benchmarks/300.utilities/321.pdf-generator/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 256, + "languages": ["nodejs"] + } + \ No newline at end of file diff --git a/benchmarks/300.utilities/321.pdf-generator/input.py b/benchmarks/300.utilities/321.pdf-generator/input.py new file mode 100644 index 00000000..41e33cb9 --- /dev/null +++ b/benchmarks/300.utilities/321.pdf-generator/input.py @@ -0,0 +1,32 @@ +import os +import glob + +def buckets_count(): + return (1, 1) # One input bucket, one output bucket + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + # The HTML file and the images directory + input_file_path = os.path.join(data_dir, 'template', 'demo.html') + images_dir = os.path.join(data_dir, 'template', 'images') # Directory path + + # Initialize input_config with 'object' and 'bucket' fields + input_config = {'object': {}, 'bucket': {}} + + # Upload the HTML file to the input bucket + upload_func(0, "demo.html", input_file_path) + + # Prepare the bucket configuration + input_config['bucket']['bucket'] = benchmarks_bucket + input_config['bucket']['input'] = input_paths[0] + input_config['bucket']['output'] = output_paths[0] + + # Upload each image in the images directory to the input bucket + for file in glob.glob(os.path.join(images_dir, '*.png')): + img = os.path.relpath(file, data_dir) + upload_func(0, img, file) + + # Store the list of image file configurations in 'object' + input_config['object']['key'] = "images/" + input_config['object']['input_file'] = 'demo.html' + + return input_config diff --git a/benchmarks/300.utilities/321.pdf-generator/nodejs/function.js b/benchmarks/300.utilities/321.pdf-generator/nodejs/function.js new file mode 100644 index 00000000..5b734e75 --- /dev/null +++ b/benchmarks/300.utilities/321.pdf-generator/nodejs/function.js @@ -0,0 +1,62 @@ +const puppeteer = require('puppeteer-core'); +const path = require('path'); +const { PassThrough } = require('stream'); +const storage = require('./storage'); + +let storage_handler = new storage.storage(); + +const browserPath = path.join(__dirname, 'chromium/chrome-linux64/chrome'); + + +exports.handler = async function(event) { + const bucket = event.bucket.bucket; + const input_prefix = event.bucket.input; + const output_prefix = event.bucket.output; + const input_file = event.object.input_file; + + // Create a read stream for the input HTML file + let readStreamPromise = storage_handler.downloadStream(bucket, path.join(input_prefix, input_file)); + + // Create a PassThrough stream to pipe the HTML content into Puppeteer + const htmlStream = new PassThrough(); + + // Create a write stream for the output PDF file + let [writeStream, promise, uploadName] = storage_handler.uploadStream(bucket, path.join(output_prefix, 'output.pdf')); + + try { + // Download the HTML file from storage + const inputStream = await readStreamPromise; + inputStream.pipe(htmlStream); + + // Launch Puppeteer and generate the PDF + const browser = await puppeteer.launch({ executablePath: browserPath }); + const page = await browser.newPage(); + await page.setContent(await streamToString(htmlStream), { waitUntil: 'networkidle0' }); + const pdfBuffer = await page.pdf({ format: 'A4' }); + + // Close Puppeteer + await browser.close(); + + // Pipe the PDF buffer into the write stream + writeStream.write(pdfBuffer); + writeStream.end(); + + // Wait for upload to complete + await promise; + + return { bucket: output_prefix, key: uploadName }; + } catch (error) { + console.error('Error generating PDF:', error); + throw error; + } +}; + +// Utility function to convert a stream to a string +function streamToString(stream) { + return new Promise((resolve, reject) => { + const chunks = []; + stream.on('data', chunk => chunks.push(chunk)); + stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); + stream.on('error', reject); + }); +} \ No newline at end of file diff --git a/benchmarks/300.utilities/321.pdf-generator/nodejs/init.sh b/benchmarks/300.utilities/321.pdf-generator/nodejs/init.sh new file mode 100644 index 00000000..efb351ca --- /dev/null +++ b/benchmarks/300.utilities/321.pdf-generator/nodejs/init.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 + +CHROMIUM_URL="https://storage.googleapis.com/chrome-for-testing-public/127.0.6533.88/linux64/chrome-linux64.zip" + +# Define the script directory and the download path +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +DOWNLOAD_DIR="${DIR}/chromium" + +# Create the target directory if it doesn't exist +mkdir -p "$DOWNLOAD_DIR" + +# Download Chromium +curl -o "${DOWNLOAD_DIR}/chrome-linux.zip" "$CHROMIUM_URL" + +# Extract the downloaded zip file +unzip -q "${DOWNLOAD_DIR}/chrome-linux.zip" -d "$DOWNLOAD_DIR" + +# Clean up the downloaded zip file +rm "${DOWNLOAD_DIR}/chrome-linux.zip" + +# Move the extracted files to the final directory +mv "${DOWNLOAD_DIR}/chrome-linux"/* "${DOWNLOAD_DIR}/" + +# Remove the empty directory +rmdir "${DOWNLOAD_DIR}/chrome-linux" + diff --git a/benchmarks/300.utilities/321.pdf-generator/nodejs/package.json b/benchmarks/300.utilities/321.pdf-generator/nodejs/package.json new file mode 100644 index 00000000..d9a57e2f --- /dev/null +++ b/benchmarks/300.utilities/321.pdf-generator/nodejs/package.json @@ -0,0 +1,8 @@ +{ + "name": "pdf-generator", + "version": "1.0.0", + "description": "PDF Generator Benchmark using Puppeteer", + "dependencies": { + "puppeteer-core": "^22.15.0" + } +}