Skip to content

Commit

Permalink
Add 601.pdf-generator benchmark and its data
Browse files Browse the repository at this point in the history
Signed-off-by: Abhishek Kumar <[email protected]>
  • Loading branch information
octonawish-akcodes committed Aug 24, 2024
1 parent 6d7b456 commit 019d571
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 1 deletion.
2 changes: 1 addition & 1 deletion benchmarks-data
6 changes: 6 additions & 0 deletions benchmarks/600.pdf/601.pdf-generator/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"timeout": 60,
"memory": 256,
"languages": ["nodejs"]
}

32 changes: 32 additions & 0 deletions benchmarks/600.pdf/601.pdf-generator/input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import glob

def buckets_count():
return (1, 1) # One input bucket, one output bucket

def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func):
# The HTML file and the images directory
input_file_path = os.path.join(data_dir, 'template', 'demo.html')
images_dir = os.path.join(data_dir, 'template', 'images') # Directory path

# Initialize input_config with 'object' and 'bucket' fields
input_config = {'object': {}, 'bucket': {}}

# Upload the HTML file to the input bucket
upload_func(0, "demo.html", input_file_path)

# Prepare the bucket configuration
input_config['bucket']['bucket'] = benchmarks_bucket
input_config['bucket']['input'] = input_paths[0]
input_config['bucket']['output'] = output_paths[0]

# Upload each image in the images directory to the input bucket
for file in glob.glob(os.path.join(images_dir, '*.png')):
img = os.path.relpath(file, data_dir)
upload_func(0, img, file)

# Store the list of image file configurations in 'object'
input_config['object']['key'] = "images/"
input_config['object']['input_file'] = 'demo.html'

return input_config
63 changes: 63 additions & 0 deletions benchmarks/600.pdf/601.pdf-generator/nodejs/function.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
const puppeteer = require('puppeteer-core');
const path = require('path');
const fs = require('fs');
const { PassThrough } = require('stream');
const storage = require('./storage');

let storage_handler = new storage.storage();

const browserPath = path.join(__dirname, 'chromium/chrome-linux64/chrome');


exports.handler = async function(event) {
const bucket = event.bucket.bucket;
const input_prefix = event.bucket.input;
const output_prefix = event.bucket.output;
const input_file = event.object.input_file;

// Create a read stream for the input HTML file
let readStreamPromise = storage_handler.downloadStream(bucket, path.join(input_prefix, input_file));

// Create a PassThrough stream to pipe the HTML content into Puppeteer
const htmlStream = new PassThrough();

// Create a write stream for the output PDF file
let [writeStream, promise, uploadName] = storage_handler.uploadStream(bucket, path.join(output_prefix, 'output.pdf'));

try {
// Download the HTML file from storage
const inputStream = await readStreamPromise;
inputStream.pipe(htmlStream);

// Launch Puppeteer and generate the PDF
const browser = await puppeteer.launch({ executablePath: browserPath });
const page = await browser.newPage();
await page.setContent(await streamToString(htmlStream), { waitUntil: 'networkidle0' });
const pdfBuffer = await page.pdf({ format: 'A4' });

// Close Puppeteer
await browser.close();

// Pipe the PDF buffer into the write stream
writeStream.write(pdfBuffer);
writeStream.end();

// Wait for upload to complete
await promise;

return { bucket: output_prefix, key: uploadName };
} catch (error) {
console.error('Error generating PDF:', error);
throw error;
}
};

// Utility function to convert a stream to a string
function streamToString(stream) {
return new Promise((resolve, reject) => {
const chunks = [];
stream.on('data', chunk => chunks.push(chunk));
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
stream.on('error', reject);
});
}
29 changes: 29 additions & 0 deletions benchmarks/600.pdf/601.pdf-generator/nodejs/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

DIR=$1
VERBOSE=$2

CHROMIUM_URL="https://storage.googleapis.com/chrome-for-testing-public/127.0.6533.88/linux64/chrome-linux64.zip"

# Define the script directory and the download path
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
DOWNLOAD_DIR="${DIR}/chromium"

# Create the target directory if it doesn't exist
mkdir -p "$DOWNLOAD_DIR"

# Download Chromium
curl -o "${DOWNLOAD_DIR}/chrome-linux.zip" "$CHROMIUM_URL"

# Extract the downloaded zip file
unzip -q "${DOWNLOAD_DIR}/chrome-linux.zip" -d "$DOWNLOAD_DIR"

# Clean up the downloaded zip file
rm "${DOWNLOAD_DIR}/chrome-linux.zip"

# Move the extracted files to the final directory
mv "${DOWNLOAD_DIR}/chrome-linux"/* "${DOWNLOAD_DIR}/"

# Remove the empty directory
rmdir "${DOWNLOAD_DIR}/chrome-linux"

8 changes: 8 additions & 0 deletions benchmarks/600.pdf/601.pdf-generator/nodejs/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "pdf-generator",
"version": "1.0.0",
"description": "PDF Generator Benchmark using Puppeteer",
"dependencies": {
"puppeteer-core": "^22.15.0"
}
}

0 comments on commit 019d571

Please sign in to comment.