Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 321.pdf-generator benchmark and its data #217

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks-data
6 changes: 6 additions & 0 deletions benchmarks/300.utilities/321.pdf-generator/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"timeout": 60,
"memory": 256,
"languages": ["nodejs"]
}

32 changes: 32 additions & 0 deletions benchmarks/300.utilities/321.pdf-generator/input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import glob

def buckets_count():
return (1, 1) # One input bucket, one output bucket

def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func):
# The HTML file and the images directory
input_file_path = os.path.join(data_dir, 'template', 'demo.html')
images_dir = os.path.join(data_dir, 'template', 'images') # Directory path

# Initialize input_config with 'object' and 'bucket' fields
input_config = {'object': {}, 'bucket': {}}

# Upload the HTML file to the input bucket
upload_func(0, "demo.html", input_file_path)

# Prepare the bucket configuration
input_config['bucket']['bucket'] = benchmarks_bucket
input_config['bucket']['input'] = input_paths[0]
input_config['bucket']['output'] = output_paths[0]

# Upload each image in the images directory to the input bucket
for file in glob.glob(os.path.join(images_dir, '*.png')):
img = os.path.relpath(file, data_dir)
upload_func(0, img, file)

# Store the list of image file configurations in 'object'
input_config['object']['key'] = "images/"
input_config['object']['input_file'] = 'demo.html'

return input_config
octonawish-akcodes marked this conversation as resolved.
Show resolved Hide resolved
62 changes: 62 additions & 0 deletions benchmarks/300.utilities/321.pdf-generator/nodejs/function.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
const puppeteer = require('puppeteer-core');
const path = require('path');
const { PassThrough } = require('stream');
const storage = require('./storage');

let storage_handler = new storage.storage();

const browserPath = path.join(__dirname, 'chromium/chrome-linux64/chrome');


exports.handler = async function(event) {
const bucket = event.bucket.bucket;
const input_prefix = event.bucket.input;
const output_prefix = event.bucket.output;
const input_file = event.object.input_file;

// Create a read stream for the input HTML file
let readStreamPromise = storage_handler.downloadStream(bucket, path.join(input_prefix, input_file));

// Create a PassThrough stream to pipe the HTML content into Puppeteer
const htmlStream = new PassThrough();

// Create a write stream for the output PDF file
let [writeStream, promise, uploadName] = storage_handler.uploadStream(bucket, path.join(output_prefix, 'output.pdf'));

try {
// Download the HTML file from storage
const inputStream = await readStreamPromise;
inputStream.pipe(htmlStream);

// Launch Puppeteer and generate the PDF
const browser = await puppeteer.launch({ executablePath: browserPath });
const page = await browser.newPage();
await page.setContent(await streamToString(htmlStream), { waitUntil: 'networkidle0' });
const pdfBuffer = await page.pdf({ format: 'A4' });

// Close Puppeteer
await browser.close();

// Pipe the PDF buffer into the write stream
writeStream.write(pdfBuffer);
writeStream.end();

// Wait for upload to complete
await promise;

return { bucket: output_prefix, key: uploadName };
} catch (error) {
console.error('Error generating PDF:', error);
throw error;
}
};

// Utility function to convert a stream to a string
function streamToString(stream) {
return new Promise((resolve, reject) => {
const chunks = [];
stream.on('data', chunk => chunks.push(chunk));
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
stream.on('error', reject);
});
}
29 changes: 29 additions & 0 deletions benchmarks/300.utilities/321.pdf-generator/nodejs/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

DIR=$1
VERBOSE=$2
octonawish-akcodes marked this conversation as resolved.
Show resolved Hide resolved

CHROMIUM_URL="https://storage.googleapis.com/chrome-for-testing-public/127.0.6533.88/linux64/chrome-linux64.zip"

# Define the script directory and the download path
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
DOWNLOAD_DIR="${DIR}/chromium"

# Create the target directory if it doesn't exist
mkdir -p "$DOWNLOAD_DIR"

# Download Chromium
curl -o "${DOWNLOAD_DIR}/chrome-linux.zip" "$CHROMIUM_URL"

# Extract the downloaded zip file
unzip -q "${DOWNLOAD_DIR}/chrome-linux.zip" -d "$DOWNLOAD_DIR"

# Clean up the downloaded zip file
rm "${DOWNLOAD_DIR}/chrome-linux.zip"

# Move the extracted files to the final directory
mv "${DOWNLOAD_DIR}/chrome-linux"/* "${DOWNLOAD_DIR}/"

# Remove the empty directory
rmdir "${DOWNLOAD_DIR}/chrome-linux"

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "pdf-generator",
"version": "1.0.0",
"description": "PDF Generator Benchmark using Puppeteer",
"dependencies": {
"puppeteer-core": "^22.15.0"
}
}