Skip to content

Commit

Permalink
feat:refactor XMLExtractor to use @flatfile/util-extractor package (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
carlbrugger authored Aug 11, 2023
1 parent c0b8d8d commit 49ef75c
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 121 deletions.
5 changes: 5 additions & 0 deletions .changeset/brown-buckets-drum.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@flatfile/plugin-xml-extractor': minor
---

Refactor XMLExtractor to use @flatfile/util-extractor package
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions plugins/xml-extractor/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"dependencies": {
"@flatfile/api": "^1.5.13",
"@flatfile/listener": "^0.3.11",
"@flatfile/util-extractor": "0.2.1",
"@flatfile/util-file-buffer": "0.0.2",
"xml-json-format": "^1.0.8"
}
Expand Down
73 changes: 7 additions & 66 deletions plugins/xml-extractor/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,69 +1,10 @@
import type { FlatfileListener } from "@flatfile/listener";
import { fileBuffer } from "@flatfile/util-file-buffer";
import { flatToValues, schemaFromObjectList, xmlToJson } from "./parser";
import api, { Flatfile } from "@flatfile/api";
import { parseBuffer } from './parser'
import { Extractor } from '@flatfile/util-extractor'

export const XMLExtractor = (opts?: {
separator?: string;
attributePrefix?: string;
transform?: (row: Record<string, any>) => Record<string, any>;
export const XMLExtractor = (options?: {
separator?: string
attributePrefix?: string
transform?: (row: Record<string, any>) => Record<string, any>
}) => {
return (handler: FlatfileListener) => {
handler.use(
fileBuffer(".xml", async (file, buffer, event) => {
const job = await api.jobs.create({
type: "file",
operation: "extract",
status: "ready",
source: event.context.fileId,
});
try {
const json = xmlToJson(buffer.toString()).map(
opts?.transform || ((x) => x)
);
const schema = schemaFromObjectList(json);
const workbook = await createWorkbook(
event.context.environmentId,
file,
file.name,
schema
);
await api.records.insert(workbook.sheets![0].id, flatToValues(json));
await api.files.update(file.id, {
workbookId: workbook.id,
});
await api.jobs.update(job.data.id, {
status: "complete",
});
console.log(workbook);
} catch (e) {
console.log(`error ${e}`);
await api.jobs.update(job.data.id, {
status: "failed",
});
}
})
);
};
};

async function createWorkbook(
environmentId: string,
file: Flatfile.File_,
filename: string,
fields: Array<{ key: string; type: "string" }>
): Promise<Flatfile.Workbook> {
const workbook = await api.workbooks.create({
name: "[file] " + filename,
sheets: [
{
name: "Default",
fields,
},
],
spaceId: file.spaceId,
labels: ["file"],
environmentId,
});
return workbook.data;
return Extractor('.xml', parseBuffer, options)
}
36 changes: 9 additions & 27 deletions plugins/xml-extractor/src/parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { findRoot, schemaFromObjectList, xmlToJson } from './parser'
import { findRoot, headersFromObjectList, xmlToJson } from './parser'

const XML = `<?xml version="1.0" encoding="UTF-8"?>
<root>
Expand Down Expand Up @@ -75,33 +75,15 @@ describe('parser', function () {
})
})

test('schemaFromObjectList', function () {
test('headersFromObjectList', function () {
const json = xmlToJson(XML)
expect(schemaFromObjectList(json)).toEqual([
{
key: 'street',
type: 'string',
},
{
key: 'country/name',
type: 'string',
},
{
key: 'country/iso',
type: 'string',
},
{
key: 'zip',
type: 'string',
},
{
key: 'zip#format',
type: 'string',
},
{
key: '#active',
type: 'string',
},
expect(headersFromObjectList(json)).toEqual([
'street',
'country/name',
'country/iso',
'zip',
'zip#format',
'#active',
])
})

Expand Down
73 changes: 45 additions & 28 deletions plugins/xml-extractor/src/parser.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,26 @@
import toJSON from 'xml-json-format'
import { WorkbookCapture } from '@flatfile/util-extractor'

export function parseBuffer(
buffer: Buffer,
options?: {
separator?: string
attributePrefix?: string
transform?: (row: Record<string, any>) => Record<string, any>
}
): WorkbookCapture {
const transform = options?.transform || ((value) => value)
const data = xmlToJson(buffer.toString()).map(transform)
const headers = headersFromObjectList(data)

const sheetName = 'Sheet1'
return {
[sheetName]: {
headers,
data,
},
} as WorkbookCapture
}

export function flatToValues(
obj: Record<string, any>[]
Expand All @@ -13,61 +35,56 @@ export function xmlToJson(xml: string): Array<Record<string, any>> {
return json.map((obj) => flattenObject(obj))
}

export function schemaFromObjectList(
export function headersFromObjectList(
arr: Array<Record<string, any>>
): Array<{ key: string; type: 'string' }> {
const keys = arr.reduce((acc, o) => {
return {
...acc,
...Object.keys(o).reduce((acc, k) => ({ ...acc, [k]: true }), {}),
}
}, {})
const obj = Object.keys(keys).reduce(
(acc, k) => ({ ...acc, [k]: { key: k, type: 'string' } }),
{}
)
return Object.values(obj)
): Array<string> {
const keys: Record<string, true> = {}
arr.forEach((obj) => {
Object.keys(obj).forEach((key) => {
keys[key] = true
})
})
return Object.keys(keys)
}

function flattenAttributes(obj: Record<string, any>): Record<string, any> {
if ('_attributes' in obj) {
const attributes = mapObject(obj._attributes, (k, v) => [`#${k}`, v])
const out = {
...obj,
...attributes,
_attributes: undefined,
}
delete out._attributes
return out
delete obj._attributes
return { ...obj, ...attributes }
}
return obj
}

function mapObject(
obj: Record<string, any>,
fn: (k: string, v: any) => [string, any]
) {
return Object.keys(obj).reduce((acc, k) => {
): Record<string, any> {
const result: Record<string, any> = {}
Object.keys(obj).forEach((k) => {
const [key, value] = fn(k, obj[k])
return { ...acc, [key]: value }
}, {})
result[key] = value
})
return result
}

function flattenObject(
input: Record<string, any>,
prefix = ''
): Record<string, any> {
const obj = flattenAttributes(input)
return Object.keys(obj).reduce((acc, k) => {
const result: Record<string, any> = {}
Object.keys(obj).forEach((k) => {
const pre = prefix
? prefix + (k.startsWith('#') || k === '_text' ? '' : '/')
: ''
if (typeof obj[k] === 'object') {
return { ...acc, ...flattenObject(obj[k], pre + k) }
Object.assign(result, flattenObject(obj[k], pre + k))
} else {
return { ...acc, [pre + (k === '_text' && pre ? '' : k)]: obj[k] }
result[pre + (k === '_text' && pre ? '' : k)] = obj[k]
}
}, {})
})
return result
}

export function findRoot(json: Record<string, any>): Array<any> {
Expand Down

0 comments on commit 49ef75c

Please sign in to comment.