diff --git a/.changeset/brown-buckets-drum.md b/.changeset/brown-buckets-drum.md new file mode 100644 index 000000000..69a08b088 --- /dev/null +++ b/.changeset/brown-buckets-drum.md @@ -0,0 +1,5 @@ +--- +'@flatfile/plugin-xml-extractor': minor +--- + +Refactor XMLExtractor to use @flatfile/util-extractor package diff --git a/package-lock.json b/package-lock.json index 00c25c5ab..4c5933ad6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13788,6 +13788,7 @@ "dependencies": { "@flatfile/api": "^1.5.13", "@flatfile/listener": "^0.3.11", + "@flatfile/util-extractor": "0.2.1", "@flatfile/util-file-buffer": "0.0.2", "xml-json-format": "^1.0.8" }, diff --git a/plugins/xml-extractor/package.json b/plugins/xml-extractor/package.json index 41f122255..bd3664d83 100644 --- a/plugins/xml-extractor/package.json +++ b/plugins/xml-extractor/package.json @@ -29,6 +29,7 @@ "dependencies": { "@flatfile/api": "^1.5.13", "@flatfile/listener": "^0.3.11", + "@flatfile/util-extractor": "0.2.1", "@flatfile/util-file-buffer": "0.0.2", "xml-json-format": "^1.0.8" } diff --git a/plugins/xml-extractor/src/index.ts b/plugins/xml-extractor/src/index.ts index 41f40af7b..5308d9591 100644 --- a/plugins/xml-extractor/src/index.ts +++ b/plugins/xml-extractor/src/index.ts @@ -1,69 +1,10 @@ -import type { FlatfileListener } from "@flatfile/listener"; -import { fileBuffer } from "@flatfile/util-file-buffer"; -import { flatToValues, schemaFromObjectList, xmlToJson } from "./parser"; -import api, { Flatfile } from "@flatfile/api"; +import { parseBuffer } from './parser' +import { Extractor } from '@flatfile/util-extractor' -export const XMLExtractor = (opts?: { - separator?: string; - attributePrefix?: string; - transform?: (row: Record) => Record; +export const XMLExtractor = (options?: { + separator?: string + attributePrefix?: string + transform?: (row: Record) => Record }) => { - return (handler: FlatfileListener) => { - handler.use( - fileBuffer(".xml", async (file, buffer, event) => { - const job = await api.jobs.create({ - type: "file", - operation: "extract", - status: "ready", - source: event.context.fileId, - }); - try { - const json = xmlToJson(buffer.toString()).map( - opts?.transform || ((x) => x) - ); - const schema = schemaFromObjectList(json); - const workbook = await createWorkbook( - event.context.environmentId, - file, - file.name, - schema - ); - await api.records.insert(workbook.sheets![0].id, flatToValues(json)); - await api.files.update(file.id, { - workbookId: workbook.id, - }); - await api.jobs.update(job.data.id, { - status: "complete", - }); - console.log(workbook); - } catch (e) { - console.log(`error ${e}`); - await api.jobs.update(job.data.id, { - status: "failed", - }); - } - }) - ); - }; -}; - -async function createWorkbook( - environmentId: string, - file: Flatfile.File_, - filename: string, - fields: Array<{ key: string; type: "string" }> -): Promise { - const workbook = await api.workbooks.create({ - name: "[file] " + filename, - sheets: [ - { - name: "Default", - fields, - }, - ], - spaceId: file.spaceId, - labels: ["file"], - environmentId, - }); - return workbook.data; + return Extractor('.xml', parseBuffer, options) } diff --git a/plugins/xml-extractor/src/parser.spec.ts b/plugins/xml-extractor/src/parser.spec.ts index e922a0d10..fbfe17c21 100644 --- a/plugins/xml-extractor/src/parser.spec.ts +++ b/plugins/xml-extractor/src/parser.spec.ts @@ -1,4 +1,4 @@ -import { findRoot, schemaFromObjectList, xmlToJson } from './parser' +import { findRoot, headersFromObjectList, xmlToJson } from './parser' const XML = ` @@ -75,33 +75,15 @@ describe('parser', function () { }) }) - test('schemaFromObjectList', function () { + test('headersFromObjectList', function () { const json = xmlToJson(XML) - expect(schemaFromObjectList(json)).toEqual([ - { - key: 'street', - type: 'string', - }, - { - key: 'country/name', - type: 'string', - }, - { - key: 'country/iso', - type: 'string', - }, - { - key: 'zip', - type: 'string', - }, - { - key: 'zip#format', - type: 'string', - }, - { - key: '#active', - type: 'string', - }, + expect(headersFromObjectList(json)).toEqual([ + 'street', + 'country/name', + 'country/iso', + 'zip', + 'zip#format', + '#active', ]) }) diff --git a/plugins/xml-extractor/src/parser.ts b/plugins/xml-extractor/src/parser.ts index 0b9e99063..dc64e17f8 100644 --- a/plugins/xml-extractor/src/parser.ts +++ b/plugins/xml-extractor/src/parser.ts @@ -1,4 +1,26 @@ import toJSON from 'xml-json-format' +import { WorkbookCapture } from '@flatfile/util-extractor' + +export function parseBuffer( + buffer: Buffer, + options?: { + separator?: string + attributePrefix?: string + transform?: (row: Record) => Record + } +): WorkbookCapture { + const transform = options?.transform || ((value) => value) + const data = xmlToJson(buffer.toString()).map(transform) + const headers = headersFromObjectList(data) + + const sheetName = 'Sheet1' + return { + [sheetName]: { + headers, + data, + }, + } as WorkbookCapture +} export function flatToValues( obj: Record[] @@ -13,32 +35,23 @@ export function xmlToJson(xml: string): Array> { return json.map((obj) => flattenObject(obj)) } -export function schemaFromObjectList( +export function headersFromObjectList( arr: Array> -): Array<{ key: string; type: 'string' }> { - const keys = arr.reduce((acc, o) => { - return { - ...acc, - ...Object.keys(o).reduce((acc, k) => ({ ...acc, [k]: true }), {}), - } - }, {}) - const obj = Object.keys(keys).reduce( - (acc, k) => ({ ...acc, [k]: { key: k, type: 'string' } }), - {} - ) - return Object.values(obj) +): Array { + const keys: Record = {} + arr.forEach((obj) => { + Object.keys(obj).forEach((key) => { + keys[key] = true + }) + }) + return Object.keys(keys) } function flattenAttributes(obj: Record): Record { if ('_attributes' in obj) { const attributes = mapObject(obj._attributes, (k, v) => [`#${k}`, v]) - const out = { - ...obj, - ...attributes, - _attributes: undefined, - } - delete out._attributes - return out + delete obj._attributes + return { ...obj, ...attributes } } return obj } @@ -46,11 +59,13 @@ function flattenAttributes(obj: Record): Record { function mapObject( obj: Record, fn: (k: string, v: any) => [string, any] -) { - return Object.keys(obj).reduce((acc, k) => { +): Record { + const result: Record = {} + Object.keys(obj).forEach((k) => { const [key, value] = fn(k, obj[k]) - return { ...acc, [key]: value } - }, {}) + result[key] = value + }) + return result } function flattenObject( @@ -58,16 +73,18 @@ function flattenObject( prefix = '' ): Record { const obj = flattenAttributes(input) - return Object.keys(obj).reduce((acc, k) => { + const result: Record = {} + Object.keys(obj).forEach((k) => { const pre = prefix ? prefix + (k.startsWith('#') || k === '_text' ? '' : '/') : '' if (typeof obj[k] === 'object') { - return { ...acc, ...flattenObject(obj[k], pre + k) } + Object.assign(result, flattenObject(obj[k], pre + k)) } else { - return { ...acc, [pre + (k === '_text' && pre ? '' : k)]: obj[k] } + result[pre + (k === '_text' && pre ? '' : k)] = obj[k] } - }, {}) + }) + return result } export function findRoot(json: Record): Array {