Skip to content

Commit

Permalink
Quit using 'pdfjs-dist' because it did not support SSR
Browse files Browse the repository at this point in the history
  • Loading branch information
Rindrics committed Dec 20, 2024
1 parent 551bc7c commit 216b10b
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 18 deletions.
1 change: 0 additions & 1 deletion app/(playground)/p/[agentId]/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import {
withCountMeasurement,
} from "@/lib/opentelemetry";
import { type ListBlobResult, del, list, put } from "@vercel/blob";
import { getDocument } from "pdfjs-dist";
import { UnstructuredClient } from "unstructured-client";
import { Strategy } from "unstructured-client/sdk/models/shared";
import { vercelBlobFileFolder } from "./constants";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
withCountMeasurement,
} from "@/lib/opentelemetry";
import { put } from "@vercel/blob";
import { getDocument } from "pdfjs-dist";
import { PDFDocument } from "pdf-lib";
import { UnstructuredClient } from "unstructured-client";
import { Strategy } from "unstructured-client/sdk/models/shared";
import { elementsToMarkdown } from "../utils/unstructured";
Expand Down Expand Up @@ -99,7 +99,7 @@ export async function parseFile(args: ParseFileInput) {
startTime,
{
strategy,
pdf: await getDocument(args.blobUrl).promise,
pdf: await PDFDocument.load(await content.arrayBuffer()),
},
);
if (partitionResponse.statusCode !== 200) {
Expand Down
Binary file modified bun.lockb
Binary file not shown.
1 change: 0 additions & 1 deletion lib/opentelemetry/pdf.d.ts

This file was deleted.

19 changes: 7 additions & 12 deletions lib/opentelemetry/wrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ import { getCurrentMeasurementScope, isRoute06User } from "@/app/(auth)/lib";
import { waitUntil } from "@vercel/functions";
import type { LanguageModelUsage } from "ai";
import type { LanguageModelV1 } from "ai";
import type { PDFDocumentProxy } from "pdfjs-dist";
import { getDocument } from "pdfjs-dist";
import * as pdfWorker from "pdfjs-dist/build/pdf.worker.mjs";
import { PDFDocument } from "pdf-lib";
import type { UnstructuredClient } from "unstructured-client";
import type { PartitionResponse } from "unstructured-client/sdk/models/operations/partition";
import type { Strategy } from "unstructured-client/sdk/models/shared";
Expand Down Expand Up @@ -155,13 +153,13 @@ type VercelBlobOperationType =

interface UnstructuredOptions {
strategy: Strategy;
pdf: PDFDocumentProxy;
pdf: PDFDocument;
}

type ServiceOptions = UnstructuredOptions | VercelBlobOperationType | undefined;

function getNumPages(pdf: PDFDocumentProxy) {
return pdf.numPages;
function getNumPages(pdf: PDFDocument) {
return pdf.getPages().length;
}

export function withCountMeasurement<T>(
Expand Down Expand Up @@ -278,25 +276,22 @@ export async function wrappedPartition(
}: PartitionParameters,
{ logger, startTime }: MeasureParameters,
): Promise<PartitionResponse> {
const pdf = await getDocument(blobUrl).promise;
const pdfContent = new Blob([await pdf.getData()], {
type: "application/pdf",
});
const content = await fetch(blobUrl).then((response) => response.blob());

return withCountMeasurement(
logger,
async () =>
client.general.partition({
partitionParameters: {
files: { fileName, content: pdfContent },
files: { fileName, content },
strategy,
splitPdfPage,
splitPdfConcurrencyLevel,
},
}),
ExternalServiceName.Unstructured,
startTime,
{ strategy, pdf },
{ strategy, pdf: await PDFDocument.load(await content.arrayBuffer()) },
).finally(() => waitForTelemetryExport());
}

Expand Down
3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"next-auth": "^5.0.0-beta.20",
"next-themes": "0.3.0",
"openai": "4.64.0",
"pdfjs-dist": "4.9.155",
"pdf-lib": "1.17.1",
"pino": "9.5.0",
"posthog-js": "1.194.2",
"react": "19.0.0-rc-4d577fd2-20241104",
Expand All @@ -103,7 +103,6 @@
"@tailwindcss/typography": "0.5.15",
"@types/bun": "1.1.13",
"@types/node": "^22",
"@types/pdfjs-dist": "^2.10.378",
"@types/react": "npm:types-react@rc",
"@types/react-dom": "npm:types-react-dom@rc",
"drizzle-kit": "0.23.0",
Expand Down

0 comments on commit 216b10b

Please sign in to comment.