From c5e375cb46505113cf76e4185843c3f8246bcf5a Mon Sep 17 00:00:00 2001 From: Eli Sherer Date: Thu, 1 Aug 2024 13:16:48 +0300 Subject: [PATCH] TS - add contains, csv, csvparse --- javascript/json-transform/src/JsonHelpers.ts | 2 +- .../TransformerFunctionContains.test.ts | 23 ++ .../functions/TransformerFunctionCsv.test.ts | 36 +++ .../TransformerFunctionCsvParse.test.ts | 21 ++ .../src/formats/FormatDeserializer.ts | 3 + .../src/formats/FormatSerializer.ts | 3 + .../src/formats/csv/CsvFormat.ts | 245 ++++++++++++++++++ .../functions/TransformerFunctionContains.ts | 32 +++ .../src/functions/TransformerFunctionCsv.ts | 60 +++++ .../functions/TransformerFunctionCsvParse.ts | 55 ++++ .../src/transformerFunctions.ts | 9 +- 11 files changed, 485 insertions(+), 4 deletions(-) create mode 100644 javascript/json-transform/src/__tests__/functions/TransformerFunctionContains.test.ts create mode 100644 javascript/json-transform/src/__tests__/functions/TransformerFunctionCsv.test.ts create mode 100644 javascript/json-transform/src/__tests__/functions/TransformerFunctionCsvParse.test.ts create mode 100644 javascript/json-transform/src/formats/FormatDeserializer.ts create mode 100644 javascript/json-transform/src/formats/FormatSerializer.ts create mode 100644 javascript/json-transform/src/formats/csv/CsvFormat.ts create mode 100644 javascript/json-transform/src/functions/TransformerFunctionContains.ts create mode 100644 javascript/json-transform/src/functions/TransformerFunctionCsv.ts create mode 100644 javascript/json-transform/src/functions/TransformerFunctionCsvParse.ts diff --git a/javascript/json-transform/src/JsonHelpers.ts b/javascript/json-transform/src/JsonHelpers.ts index d1dd610..868fe8d 100644 --- a/javascript/json-transform/src/JsonHelpers.ts +++ b/javascript/json-transform/src/JsonHelpers.ts @@ -9,7 +9,7 @@ const JSONPATH_ROOT = "$", JSONPATH_ALT_PREFIX = "#", JSONPATH_ALT_PREFIX_ESC = "\\#"; -const isNullOrUndefined = (value: any) => value == null || typeof value === 'undefined'; +const isNullOrUndefined = (value: any) : value is null | undefined => value == null || typeof value === 'undefined'; const getAsString = (value: any) : null | string => { if (isNullOrUndefined(value)) { diff --git a/javascript/json-transform/src/__tests__/functions/TransformerFunctionContains.test.ts b/javascript/json-transform/src/__tests__/functions/TransformerFunctionContains.test.ts new file mode 100644 index 0000000..7ace71c --- /dev/null +++ b/javascript/json-transform/src/__tests__/functions/TransformerFunctionContains.test.ts @@ -0,0 +1,23 @@ +import { describe, test } from "vitest"; +import { assertTransformation} from "../BaseTransformationTest"; + +describe("TransformerFunctionContains", () => { + test("object", () => { + assertTransformation([0, [], "a"], { + "$$contains": "$", "that": "a" + }, true); + // with transformation + assertTransformation("a", { + "$$contains": ["b","$"], "that": "a" + }, true); + + assertTransformation([0, [], "a"], { + "$$contains": "$", "that": "b" + }, false); + }); + + test("inline", () => { + assertTransformation([0, [], "a"], "$$contains(a):$", true); + assertTransformation([0, [], "a"], "$$contains(b):$", false); + }); +}); diff --git a/javascript/json-transform/src/__tests__/functions/TransformerFunctionCsv.test.ts b/javascript/json-transform/src/__tests__/functions/TransformerFunctionCsv.test.ts new file mode 100644 index 0000000..8a1c04f --- /dev/null +++ b/javascript/json-transform/src/__tests__/functions/TransformerFunctionCsv.test.ts @@ -0,0 +1,36 @@ +import { describe, test } from "vitest"; +import { assertTransformation} from "../BaseTransformationTest"; + +describe("TransformerFunctionCsv", () => { + test("inline", () => { + assertTransformation( [{"a":"A","b":1},{"a":"C","b":2}], + "$$csv:$", +"a,b\nA,1\nC,2\n"); + assertTransformation([{"a":"A","b":1},{"a":"C","b":2}], + "$$csv(true):$", + "A,1\nC,2\n"); + }); + + test("object", () => { + assertTransformation( + [{"a":"A","b":1},{"a":"C","b":2}], + { + "$$csv": "$" + }, "a,b\nA,1\nC,2\n"); + assertTransformation( + [{"a":"A","b":1},{"a":"C","b":2}], + { + "$$csv": "$", + "no_headers": true + }, "A,1\nC,2\n"); + }); + + test("object_names", () => { + assertTransformation([[1,2],[3,4]], { + "$$csv": "$", + "names": ["a","b"] + }, "a,b\n1,2\n3,4\n"); + // without names + assertTransformation([[1,2],[3,4]], { "$$csv": "$" }, "1,2\n3,4\n"); + }); +}); diff --git a/javascript/json-transform/src/__tests__/functions/TransformerFunctionCsvParse.test.ts b/javascript/json-transform/src/__tests__/functions/TransformerFunctionCsvParse.test.ts new file mode 100644 index 0000000..588047c --- /dev/null +++ b/javascript/json-transform/src/__tests__/functions/TransformerFunctionCsvParse.test.ts @@ -0,0 +1,21 @@ +import { describe, test } from "vitest"; +import { assertTransformation} from "../BaseTransformationTest"; + +describe("TransformerFunctionCsvParse", () => { + test("inline", () => { + assertTransformation("a\n\",\"", "$$csvparse:$", [{"a": ","}]); + assertTransformation("a\n\"\"\"\"", "$$csvparse:$", [{"a": "\""}]); + assertTransformation("1,2\n3,4", "$$csvparse(true):$", [["1", "2"], ["3", "4"]]); + }); + + test("object", () => { + assertTransformation("a\n\",\"", { + "$$csvparse": "$" + }, + [{"a": ","}]); + assertTransformation("a\n\"\"\"\"", { + "$$csvparse": "$" + }, + [{"a": "\""}]); + }); +}); diff --git a/javascript/json-transform/src/formats/FormatDeserializer.ts b/javascript/json-transform/src/formats/FormatDeserializer.ts new file mode 100644 index 0000000..390fd31 --- /dev/null +++ b/javascript/json-transform/src/formats/FormatDeserializer.ts @@ -0,0 +1,3 @@ +export interface FormatDeserializer { + deserialize(input: string): Record; +} \ No newline at end of file diff --git a/javascript/json-transform/src/formats/FormatSerializer.ts b/javascript/json-transform/src/formats/FormatSerializer.ts new file mode 100644 index 0000000..a0b9a21 --- /dev/null +++ b/javascript/json-transform/src/formats/FormatSerializer.ts @@ -0,0 +1,3 @@ +export interface FormatSerializer { + serialize(payload: any): string; +} \ No newline at end of file diff --git a/javascript/json-transform/src/formats/csv/CsvFormat.ts b/javascript/json-transform/src/formats/csv/CsvFormat.ts new file mode 100644 index 0000000..2258570 --- /dev/null +++ b/javascript/json-transform/src/formats/csv/CsvFormat.ts @@ -0,0 +1,245 @@ +import {getAsString, isNullOrUndefined} from "../../JsonHelpers"; +import {FormatSerializer} from "../FormatSerializer"; +import {FormatDeserializer} from "../FormatDeserializer"; + +const MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; +function charCount(codePoint: number) { + return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; +} + +class CsvFormat implements FormatSerializer, FormatDeserializer { + private static readonly COMMA = ","; + private static readonly DEFAULT_SEPARATOR = CsvFormat.COMMA; + private static readonly DOUBLE_QUOTES = "\""; + private static readonly EMBEDDED_DOUBLE_QUOTES = "\"\""; + private static readonly NEW_LINE_UNIX = "\n"; + private static readonly LINE_FEED = '\n'.codePointAt(0); + private static readonly CARRIAGE_RETURN = '\r'.codePointAt(0); + private static readonly NEW_LINE_WINDOWS = "\r\n"; + + private readonly names?: string[]; + private readonly noHeaders: boolean; + private readonly forceQuote: boolean; + private readonly separator: string; + + constructor(names?: string[] | null, noHeaders?: boolean | null, forceQuote?: boolean | null, separator?: string | null) { + this.names = names ?? undefined; + this.noHeaders = isNullOrUndefined(noHeaders) ? false : noHeaders; + this.forceQuote = isNullOrUndefined(forceQuote) ? false : forceQuote; + this.separator = isNullOrUndefined(separator) ? CsvFormat.DEFAULT_SEPARATOR : separator; + } + + private appendEscaped(sb: StringBuilder, val: any): void { + let value: string; + if (val === null || val === undefined) { + value = ""; + } else { + value = getAsString(val) ?? ""; + } + if (this.forceQuote || + value.includes(CsvFormat.COMMA) || + value.includes(CsvFormat.DOUBLE_QUOTES) || + value.includes(CsvFormat.NEW_LINE_UNIX) || + value.includes(CsvFormat.NEW_LINE_WINDOWS) || + value.startsWith(" ") || + value.endsWith(" ")) { + sb.append(CsvFormat.DOUBLE_QUOTES); + sb.append(value.replace(new RegExp(CsvFormat.DOUBLE_QUOTES, 'g'), CsvFormat.EMBEDDED_DOUBLE_QUOTES)); + sb.append(CsvFormat.DOUBLE_QUOTES); + } else { + sb.append(value); + } + } + + private appendHeaders(sb: StringBuilder, headers: string[]): void { + if (this.noHeaders) return; + let first = true; + for (const name of headers) { + if (!first) { + sb.append(this.separator); + } else { + first = false; + } + this.appendEscaped(sb, name); + } + sb.append("\n"); + } + + private appendRow(sb: StringBuilder, names: string[] | null | undefined, value: any): void { + if (!Array.isArray(value) && names) { + if (typeof value !== 'object' || value === null) return; + let first = true; + for (const name of names) { + if (!first) { + sb.append(this.separator); + } else { + first = false; + } + this.appendEscaped(sb, value[name]); + } + } else { + let first = true; + for (const val of value) { + if (!first) { + sb.append(this.separator); + } else { + first = false; + } + this.appendEscaped(sb, val); + } + } + sb.append("\n"); + } + + serialize(payload: any): string { + const sb = new StringBuilder(); + let headers = this.names; + if (headers) { + this.appendHeaders(sb, headers); + } + + if (Array.isArray(payload)) { + if (!headers && payload.length > 0 && typeof payload[0] === 'object' && !Array.isArray(payload[0])) { + headers = Object.keys(payload[0]); + this.appendHeaders(sb, headers); + } + for (const x of payload) { + this.appendRow(sb, headers, x); + } + } else { + throw new Error("Unsupported object type to be formatted as CSV"); + } + + return sb.toString(); + } + + private accumulate(context: CsvParserContext, result: any[], values: any[]): void { + if (result.length === 0 && !context.namesRead && !this.noHeaders) { + context.names = values; + context.namesRead = true; + return; + } + if (this.noHeaders && isNullOrUndefined(this.names)) { + result.push(values); + return; + } + if (!isNullOrUndefined(context.names)) { + const item : Record = {}; + let i = 0; + for (i = 0; i < context.names.length; i++) { + const name = getAsString(context.names[i]) ?? ""; + if ((context.extractNames === null || Object.prototype.hasOwnProperty.call(context.extractNames, name)) && values.length > i) { + item[name] = values[i]; + } + } + for (; i < values.length; i++) { + if (!Object.prototype.hasOwnProperty.call(item, `$${i}`)) { + item[`$${i}`] = values[i]; + } + } + result.push(item); + } + } + + deserialize(input: string): any { + const result : any[] = []; + const context = new CsvParserContext(); + if (this.noHeaders && !isNullOrUndefined(this.names)) { + const names: string[] = []; + this.names.forEach(item => names.push(item)); + context.names = names; + } + context.extractNames = this.names ?? null; + + const len = input.length; + let row: any[] = []; + const cell = new StringBuilder(); + let offset = 0; + + while (offset < len) { + const cur = input.codePointAt(offset) as number; + const curSize = charCount(cur); + const next = offset + curSize < len ? input.codePointAt(offset + curSize) as number : -1; + const curAndNextSize = curSize + charCount(next); + + if (cur === this.separator.codePointAt(0)) { + if (context.inQuotes) { + cell.append(this.separator); + } else { + row.push(cell.toString()); + cell.clear(); + } + + offset += curSize; + } else if ((cur === CsvFormat.CARRIAGE_RETURN && next === CsvFormat.LINE_FEED) || cur === CsvFormat.LINE_FEED) { + const unix = cur === CsvFormat.LINE_FEED; + const eof = offset + (unix ? curSize : curAndNextSize) === len; + if (!eof) { + if (context.inQuotes) { + cell.append(unix ? CsvFormat.NEW_LINE_UNIX : CsvFormat.NEW_LINE_WINDOWS); + } else { + row.push(cell.toString()); + cell.clear(); + this.accumulate(context, result, row); + row = []; + } + } + offset += unix ? curSize : curAndNextSize; + } else if (cur === 34 && next === 34) { + if (context.inQuotes) { + cell.append(CsvFormat.DOUBLE_QUOTES); + offset += curAndNextSize; + } else if (cell.length === 0) { + context.inQuotes = !context.inQuotes; + offset += curSize; + } else { + cell.append(CsvFormat.DOUBLE_QUOTES); + offset += curSize; + } + } else if (cur === 34) { + context.inQuotes = !context.inQuotes; + offset += curSize; + } else if (!context.inQuotes && (cur === 32 || cur === 9)) { + offset += curSize; + } else { + cell.append(String.fromCodePoint(cur)); + offset += curSize; + } + } + + if (result.length || cell.length > 0) { + row.push(cell.toString()); + this.accumulate(context, result, row); + } + return result as any; + } +} + +class CsvParserContext { + public inQuotes = false; + public names: string[] | null = null; + public namesRead = false; + public extractNames: string[] | null = null; +} + +class StringBuilder { + private strings: string[] = []; + + public append(str: string): void { + this.strings.push(str); + } + + public toString(): string { + return this.strings.join(''); + } + + public clear(): void { + this.strings.length = 0; + } + + public get length(): number { + return this.toString().length; + } +} + +export default CsvFormat; \ No newline at end of file diff --git a/javascript/json-transform/src/functions/TransformerFunctionContains.ts b/javascript/json-transform/src/functions/TransformerFunctionContains.ts new file mode 100644 index 0000000..df9f1cf --- /dev/null +++ b/javascript/json-transform/src/functions/TransformerFunctionContains.ts @@ -0,0 +1,32 @@ +import TransformerFunction from "./common/TransformerFunction"; +import {ArgType} from "./common/ArgType"; +import FunctionContext from "./common/FunctionContext"; +import {FunctionDescription} from "./common/FunctionDescription"; +import {isEqual, isNullOrUndefined, isTruthy} from "../JsonHelpers"; + +const DESCRIPTION : FunctionDescription = { + aliases: ["contains"], + description: "", + inputType: ArgType.Array, + arguments: { + that: { + type: ArgType.Any, position: 0, defaultIsNull: true, + description: "The value to look for" + } + }, + outputType: ArgType.Boolean +}; +class TransformerFunctionContains extends TransformerFunction { + constructor() { + super(DESCRIPTION); + } + + override apply(context: FunctionContext): any { + const streamer = context.getJsonElementStreamer(null); + if (streamer == null) return null; + const that = context.getJsonElement( "that"); + return streamer.stream().any(el => isEqual(el, that)); + } +} + +export default TransformerFunctionContains; \ No newline at end of file diff --git a/javascript/json-transform/src/functions/TransformerFunctionCsv.ts b/javascript/json-transform/src/functions/TransformerFunctionCsv.ts new file mode 100644 index 0000000..8f7fa92 --- /dev/null +++ b/javascript/json-transform/src/functions/TransformerFunctionCsv.ts @@ -0,0 +1,60 @@ +import TransformerFunction from "./common/TransformerFunction"; +import {ArgType} from "./common/ArgType"; +import FunctionContext from "./common/FunctionContext"; +import {FunctionDescription} from "./common/FunctionDescription"; +import {getAsString, isEqual, isNullOrUndefined, isTruthy} from "../JsonHelpers"; +import CsvFormat from "../formats/csv/CsvFormat"; + +const DESCRIPTION : FunctionDescription = { + aliases: ["csv"], + description: "", + inputType: ArgType.Array, + arguments: { + no_headers: { + type: ArgType.Boolean, position: 0, defaultBoolean: false, + description: "Whether to include object keys as headers (taken from first object if no `names`)" + }, + force_quote: { + type: ArgType.Boolean, position: 1, defaultBoolean: false, + description: "Whether to quote all the values" + }, + separator: { + type: ArgType.String, position: 2, defaultString: ",", + description: "Use an alternative field separator" + }, + names: { + type: ArgType.Array, position: 3, defaultIsNull: true, + description: "Names of fields to extract into csv if objects (will be used as the header row, unless `no_headers`)" + } + }, + outputType: ArgType.String +}; +class TransformerFunctionCsv extends TransformerFunction { + constructor() { + super(DESCRIPTION); + } + + override apply(context: FunctionContext): any { + const streamer = context.getJsonElementStreamer(null); + try { + if (streamer == null) + return null; + const names = context.getJsonArray("names"); + const noHeaders = context.getBoolean("no_headers"); + const forceQuote = context.getBoolean("force_quote"); + const separator = context.getString("separator"); + const namesList = names?.map(el => getAsString(el) ?? ""); + return new CsvFormat( + namesList, + noHeaders, + forceQuote, + separator) + .serialize(streamer.toJsonArray()); + } catch (e: any) { + console.warn(context.getAlias() + " failed", e); + return null; + } + } +} + +export default TransformerFunctionCsv; \ No newline at end of file diff --git a/javascript/json-transform/src/functions/TransformerFunctionCsvParse.ts b/javascript/json-transform/src/functions/TransformerFunctionCsvParse.ts new file mode 100644 index 0000000..057fadc --- /dev/null +++ b/javascript/json-transform/src/functions/TransformerFunctionCsvParse.ts @@ -0,0 +1,55 @@ +import TransformerFunction from "./common/TransformerFunction"; +import {ArgType} from "./common/ArgType"; +import FunctionContext from "./common/FunctionContext"; +import {FunctionDescription} from "./common/FunctionDescription"; +import {getAsString, isEqual, isNullOrUndefined, isTruthy} from "../JsonHelpers"; +import CsvFormat from "../formats/csv/CsvFormat"; + +const DESCRIPTION : FunctionDescription = { + aliases: ["csvparse"], + description: "", + inputType: ArgType.String, + arguments: { + no_headers: { + type: ArgType.Boolean, position: 0, defaultBoolean: false, + description: "Whether to treat the first row as object keys" + }, + separator: { + type: ArgType.String, position: 1, defaultString: ",", + description: "Use an alternative field separator" + }, + names: { + type: ArgType.Array, position: 2, defaultIsNull: true, + description: "Names of fields of input arrays (by indices) or objects (can sift if provided less names than there are in the objects provided)" + } + }, + outputType: ArgType.Array +}; +class TransformerFunctionCsvParse extends TransformerFunction { + constructor() { + super(DESCRIPTION); + } + + override apply(context: FunctionContext): any { + const csv = context.getString(null); + try { + if (csv == null) + return null; + const names = context.getJsonArray("names"); + const noHeaders = context.getBoolean("no_headers"); + const separator = context.getString("separator"); + const namesList = names?.map(el => getAsString(el) ?? ""); + return new CsvFormat( + namesList, + noHeaders, + false, // not relevant for deserialization + separator) + .deserialize(csv); + } catch (e: any) { + console.warn(context.getAlias() + " failed", e); + return null; + } + } +} + +export default TransformerFunctionCsvParse; \ No newline at end of file diff --git a/javascript/json-transform/src/transformerFunctions.ts b/javascript/json-transform/src/transformerFunctions.ts index 0c5a299..31578bc 100644 --- a/javascript/json-transform/src/transformerFunctions.ts +++ b/javascript/json-transform/src/transformerFunctions.ts @@ -15,6 +15,9 @@ import TransformerFunctionUpper from "./functions/TransformerFunctionUpper"; import TransformerFunctionIs from "./functions/TransformerFunctionIs"; import TransformerFunctionCoalesce from "./functions/TransformerFunctionCoalesce"; import TransformerFunctionConcat from "./functions/TransformerFunctionConcat"; +import TransformerFunctionContains from "./functions/TransformerFunctionContains"; +import TransformerFunctionCsv from "./functions/TransformerFunctionCsv"; +import TransformerFunctionCsvParse from "./functions/TransformerFunctionCsvParse"; class FunctionMatchResult { private result; @@ -51,9 +54,9 @@ export class TransformerFunctions { "boolean": new TransformerFunctionBoolean(), "coalesce": new TransformerFunctionCoalesce(), "concat": new TransformerFunctionConcat(), - "contains": new TransformerFunction(UNIMPLEMENTED), // TODO: new TransformerFunctionContains(), - "csv": new TransformerFunction(UNIMPLEMENTED), // TODO: new TransformerFunctionCsv(), - "csvparse": new TransformerFunction(UNIMPLEMENTED), // TODO: new TransformerFunctionCsvParse(), + "contains": new TransformerFunctionContains(), + "csv": new TransformerFunctionCsv(), + "csvparse": new TransformerFunctionCsvParse(), "date": new TransformerFunction(UNIMPLEMENTED), // TODO: new TransformerFunctionDate(), "decimal": new TransformerFunction(UNIMPLEMENTED), // TODO: new TransformerFunctionDecimal(), "digest": new TransformerFunction(UNIMPLEMENTED), // TODO: new TransformerFunctionDigest(),