diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bc3c00a9..02916c524 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Bumps `rimraf` from 5.0.5 to 5.0.7 - Bumps `aws4` from 1.12.0 to 1.13.0 ### Changed +- Upgraded the parsing and serialization of long numerals to employ JSON11 ([784](https://github.com/opensearch-project/opensearch-js/pull/784)). ### Deprecated ### Removed ### Fixed diff --git a/lib/Serializer.js b/lib/Serializer.js index 4fb5123c4..2636a53d0 100644 --- a/lib/Serializer.js +++ b/lib/Serializer.js @@ -34,60 +34,9 @@ const debug = require('debug')('opensearch'); const sjson = require('secure-json-parse'); const { SerializationError, DeserializationError } = require('./errors'); const kJsonOptions = Symbol('secure json parse options'); +const JSON11 = require('json11'); -/* In JavaScript, a `Number` is a 64-bit floating-point value which can store 16 digits. However, the - * serializer and deserializer will need to cater to numeric values generated by other languages which - * can have up to 19 digits. Native JSON parser and stringifier, incapable of handling the extra - * digits, corrupt the values, making them unusable. - * - * To work around this limitation, the deserializer converts long sequences of digits into strings and - * marks them before applying the parser. During the parsing, string values that begin with the mark - * are converted to `BigInt` values. - * Similarly, during stringification, the serializer converts `BigInt` values to marked strings and - * when done, it replaces them with plain numerals. - * - * `Number.MAX_SAFE_INTEGER`, 9,007,199,254,740,991, is the largest number that the native methods can - * parse and stringify, and any numeral greater than that would need to be translated using the - * workaround; all 17-digits or longer and only tail-end of the 16-digits need translation. It would - * be unfair to all the 16-digit numbers if the translation applied to `\d{16,}` only to cover the - * less than 10%. Hence, a RegExp is created to only match numerals too long to be a number. - * - * To make the explanation simpler, let's assume that MAX_SAFE_INTEGER is 8921 which has 4 digits. - * Starting from the right, we take each digit onwards, `[-9]`: - * 1) 7922 - 7929: 792[2-9]\d{0} - * 2) 7930 - 7999: 79[3-9]\d{1} - * 9) 9 + 1 = 10 which results in a rollover; no need to do anything. - * 8) 9000 - 9999: [9-9]\d{3} - * Finally we add anything 5 digits or longer: `\d{5,} - * - * PS, a better solution would use AST but considering its performance penalty, RegExp is the next - * the best solution. - */ const isBigIntSupported = typeof BigInt !== 'undefined'; -const maxIntAsString = String(Number.MAX_SAFE_INTEGER); -const maxIntLength = maxIntAsString.length; -// Sub-patterns for each digit -const bigIntMatcherTokens = [`\\d{${maxIntAsString.length + 1},}`]; -for (let i = 0; i < maxIntLength; i++) { - if (maxIntAsString[i] !== '9') { - bigIntMatcherTokens.push( - maxIntAsString.substring(0, i) + - `[${parseInt(maxIntAsString[i], 10) + 1}-9]` + - `\\d{${maxIntLength - i - 1}}` - ); - } -} - -/* The matcher that looks for `": , ...}` and `[..., , ...]` - * - * The pattern starts by looking for `":` not immediately preceded by a `\`. That should be - * followed by any of the numeric sub-patterns. A comma, end of an array, end of an object, or - * the end of the input are the only acceptable elements after it. - */ -const bigIntMatcher = new RegExp( - `((?:\\[|,|(? { - if (json.indexOf(marker) === -1) { - bigIntMarker = marker; - return true; - } - }); - } while (!bigIntMarker); - - return { - bigIntMarker, - length, - }; - } - - _parseWithBigInt(json) { - const { bigIntMarker, length } = this._getSuitableBigIntMarker(json); - - let hadException; - let markedJSON = json.replace(bigIntMatcher, `$1"${bigIntMarker}$2"$3`); - - /* RegExp cannot replace AST and the process of marking adds quotes. So, any false-positive hit - * will make the JSON string unparseable. - * - * To find those instances, we try to parse and watch for the location of any errors. If an error - * is caused by the marking, we remove that single marking and try again. - */ - do { - try { - hadException = false; - JSON.parse(markedJSON); - } catch (e) { - hadException = true; - /* There are two types of exception objects that can be raised: - * 1) a proper object with lineNumber and columnNumber which we can use - * 2) a textual message with the position that we need to parse - */ - let { lineNumber, columnNumber } = e; - if (!lineNumber || !columnNumber) { - const match = - // ToDo: When support for ancient versions of Node.js are dropped, replace with - // e?.message?.match?.() - e && - e.message && - typeof e.message.match === 'function' && - e.message.match(/^Unexpected token.*at position (\d+)$/); - if (match) { - lineNumber = 1; - // The position is zero-indexed; adding 1 to normalize it for the -2 that comes later - columnNumber = parseInt(match[1], 10) + 1; - } - } - - if (lineNumber < 1 || columnNumber < 2) { - // The problem is not with this replacement; just return a failure. - return; - } - - /* We need to skip e.lineNumber - 1 number of `\n` occurrences. - * Then, we need to go to e.columnNumber - 2 to look for `"\d+"`; we need to `-1` to - * account for the quote but an additional `-1` is needed because columnNumber starts from 1. - */ - const re = new RegExp( - `^((?:.*\\n){${lineNumber - 1}}[^\\n]{${columnNumber - 2}})"${bigIntMarker}(-?\\d+)"` - ); - if (!re.test(markedJSON)) { - // The exception is not caused by adding the marker - return; - } - - // We have found a bad replacement; let's remove it. - markedJSON = markedJSON.replace(re, '$1$2'); - } - } while (hadException); - - const bigIntMarkFinder = new RegExp(`^${bigIntMarker}-?\\d+$`); - - // Exceptions will trickle up to the caller - return sjson.parse( - markedJSON, - (key, val) => - /* Convert marked values to BigInt values. - * The `startsWith` is purely for performance, to avoid running `test` if not needed. - */ - typeof val === 'string' && val.startsWith(bigIntMarker) && bigIntMarkFinder.test(val) - ? BigInt(val.substring(length)) // eslint-disable-line no-undef - : val, - this[kJsonOptions] - ); - } - - _stringifyWithBigInt(object, candidate) { - const { bigIntMarker } = this._getSuitableBigIntMarker(candidate); - - /* The matcher that looks for "" - * Because we have made sure that `bigIntMarker` was never present in the original object, we can - * carelessly assume every "" is due to our marking. - */ - const markedBigIntMatcher = new RegExp(`"${bigIntMarker}(-?\\d+)"`, 'g'); - - return ( - JSON.stringify( - object, - /* Convert BigInt values to a string and mark them. - * Can't be bothered with Number values beyond safe values because they are already corrupted. - */ - (key, val) => (typeof val === 'bigint' ? `${bigIntMarker}${val.toString()}` : val) - ) - // Replace marked substrings with just the numerals - .replace(markedBigIntMatcher, '$1') - ); - } - serialize(object) { debug('Serializing', object); let json; @@ -262,11 +63,29 @@ class Serializer { const shouldHandleLongNumerals = isBigIntSupported && this[kJsonOptions].enableLongNumeralSupport; try { + /* When handling long numerals is not requested or the platform doesn't support BigInt, the + * result of JSON.stringify are returned. + * + * When long numerals should be handled: + * Use JSON.stringify to check if any value is a BigInt: + * * If no BigInt values are found, the result of JSON.stringify is good enough to be returned. + * * Only If a BigInt value is found, JSON11.stringify is employed and its result is returned. + */ json = JSON.stringify(object, shouldHandleLongNumerals ? checkForBigInts : null); if (shouldHandleLongNumerals && !numeralsAreNumbers) { - const temp = this._stringifyWithBigInt(object, json); - if (temp) json = temp; + try { + // With `withBigInt: false`, valid JSON is produced while maintaining accuracy + const temp = JSON11.stringify(object, { + withBigInt: false, + quote: '"', + quoteNames: true, + }); + if (temp) json = temp; + } catch (ex) { + // Do nothing: JSON.stringify succeeded but JSON11.stringify failed; return the + // JSON.stringify result. + } } } catch (err) { throw new SerializationError(err.message, object); @@ -292,6 +111,14 @@ class Serializer { const shouldHandleLongNumerals = isBigIntSupported && this[kJsonOptions].enableLongNumeralSupport; try { + /* When handling long numerals is not requested or the platform doesn't support BigInt, the + * result of sjson.parse are returned. + * + * When long numerals should be handled: + * Use sjson.parse to check if any value is outside the range of safe integers: + * * If no long numerals are found, the result of sjson.parse is good enough to be returned. + * * Only If long numerals are found, JSON11.parse is employed and its result is returned. + */ object = sjson.parse( json, shouldHandleLongNumerals ? checkForLargeNumerals : null, @@ -299,9 +126,13 @@ class Serializer { ); if (shouldHandleLongNumerals && !numeralsAreNumbers) { - const temp = this._parseWithBigInt(json); - if (temp) { - object = temp; + try { + const temp = JSON11.parse(json, null, { withLongNumerals: true }); + if (temp) { + object = temp; + } + } catch (ex) { + // Do nothing: sjson.parse succeeded but JSON11.parse failed; return the sjson.parse result } } } catch (err) { diff --git a/package.json b/package.json index 2f769ae0b..2e1842be0 100644 --- a/package.json +++ b/package.json @@ -98,6 +98,7 @@ "xmlbuilder2": "^3.0.2" }, "dependencies": { + "json11": "^1.0.4", "aws4": "^1.11.0", "debug": "^4.3.1", "hpagent": "^1.2.0", diff --git a/test/fixtures/longnumerals-dataset.ndjson b/test/fixtures/longnumerals-dataset.ndjson index 59bc6963c..5d596258c 100644 --- a/test/fixtures/longnumerals-dataset.ndjson +++ b/test/fixtures/longnumerals-dataset.ndjson @@ -1,5 +1,5 @@ {"number":18014398509481982,"description":"-18014398509481982 , -1 , 1 , 18014398509481982"} -{"number":-18014398509481982,"description":"෴߷֍෴18014398509481982"} -{"description":"[\"෴߷֍෴18014398509481982\"]", "number":18014398509481982} +{"number":-18014398509481982,"description":"18014398509481982,1,-1,-18014398509481982"} +{"description":"[\"18014398509481982\"]", "number":18014398509481982} {"description":"18014398509481982", "number":18014398509481982} -{"number":9007199254740891,"description":"Safer than [18014398509481982]"} +{"number":9007199254740891,"description":"Safer than [18014398509481982]"} \ No newline at end of file diff --git a/test/integration/serializer/longnumerals.test.js b/test/integration/serializer/longnumerals.test.js index 7a10aafbb..308e7d22d 100644 --- a/test/integration/serializer/longnumerals.test.js +++ b/test/integration/serializer/longnumerals.test.js @@ -85,9 +85,9 @@ test('long numerals', async (t) => { } t.same(object, { '-18014398509481982 , -1 , 1 , 18014398509481982': 18014398509481982n, - '෴߷֍෴18014398509481982': -18014398509481982n, + '18014398509481982,1,-1,-18014398509481982': -18014398509481982n, 'Safer than [18014398509481982]': 9007199254740891, 18014398509481982: 18014398509481982n, - '["෴߷֍෴18014398509481982"]': 18014398509481982n, + '["18014398509481982"]': 18014398509481982n, }); }); diff --git a/yarn.lock b/yarn.lock index eb92932d7..060d940bf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2224,6 +2224,11 @@ json-stable-stringify-without-jsonify@^1.0.1: resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651" integrity sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE= +json11@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/json11/-/json11-1.0.4.tgz#47df69bc54bc0239211020f5ef234f4fe8ffac5f" + integrity sha512-4o9dcmRyqy2O4wDaUnv8YtrVEm1NBVVIPCN/z6en8PH+LyFdZsE2trW7hbl4+CwI4pTBKZa/JKKoT8zqO7EV8w== + json5@^2.1.2: version "2.2.3" resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283"