-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add file parsing functionality using Unstructured API
- Add parseFile server action to process uploaded files - Integrate with Unstructured API to extract structured content - Generate both JSON and Markdown outputs from parsed content - Update file processing flow to include structured data URLs - Store processed results in Vercel Blob storage
- Loading branch information
1 parent
25926f6
commit 68b152b
Showing
3 changed files
with
164 additions
and
3 deletions.
There are no files selected for viewing
70 changes: 70 additions & 0 deletions
70
app/(playground)/p/[agentId]/beta-proto/files/server-actions.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import { put } from "@vercel/blob"; | ||
import { UnstructuredClient } from "unstructured-client"; | ||
import { Strategy } from "unstructured-client/sdk/models/shared"; | ||
import { elementsToMarkdown } from "../utils/unstructured"; | ||
import type { FileId } from "./types"; | ||
|
||
type UploadFileInput = { | ||
fileId: FileId; | ||
file: File; | ||
}; | ||
export async function uploadFile({ input }: { input: UploadFileInput }) { | ||
const blob = await put( | ||
`files/${input.fileId}/${input.file.name}`, | ||
input.file, | ||
{ | ||
access: "public", | ||
contentType: input.file.type, | ||
}, | ||
); | ||
return blob; | ||
} | ||
|
||
type ParseFileInput = { | ||
id: FileId; | ||
name: string; | ||
blobUrl: string; | ||
}; | ||
export async function parseFile(args: ParseFileInput) { | ||
if (process.env.UNSTRUCTURED_API_KEY === undefined) { | ||
throw new Error("UNSTRUCTURED_API_KEY is not set"); | ||
} | ||
const client = new UnstructuredClient({ | ||
security: { | ||
apiKeyAuth: process.env.UNSTRUCTURED_API_KEY, | ||
}, | ||
}); | ||
const response = await fetch(args.blobUrl); | ||
const content = await response.blob(); | ||
const partitionReponse = await client.general.partition({ | ||
partitionParameters: { | ||
files: { | ||
fileName: args.name, | ||
content, | ||
}, | ||
strategy: Strategy.Fast, | ||
splitPdfPage: false, | ||
splitPdfConcurrencyLevel: 1, | ||
}, | ||
}); | ||
if (partitionReponse.statusCode !== 200) { | ||
console.error(partitionReponse.rawResponse); | ||
throw new Error(`Failed to parse file: ${partitionReponse.statusCode}`); | ||
} | ||
const jsonString = JSON.stringify(partitionReponse.elements, null, 2); | ||
const blob = new Blob([jsonString], { type: "application/json" }); | ||
|
||
await put(`files/${args.id}/partition.json`, blob, { | ||
access: "public", | ||
contentType: blob.type, | ||
}); | ||
|
||
const markdown = elementsToMarkdown(partitionReponse.elements ?? []); | ||
const markdownBlob = new Blob([markdown], { type: "text/markdown" }); | ||
const vercelBlob = await put(`files/${args.id}/markdown.md`, markdownBlob, { | ||
access: "public", | ||
contentType: markdownBlob.type, | ||
}); | ||
|
||
return vercelBlob; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters