From c551698e34a8b4ccc3435c079c329b062734489e Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Thu, 12 Dec 2024 14:23:41 -0800 Subject: [PATCH 1/8] implements xsd:token WhitespaceMinimizedString distinction --- lib/DataHarmonizer.js | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/lib/DataHarmonizer.js b/lib/DataHarmonizer.js index 56c09981..8884214b 100644 --- a/lib/DataHarmonizer.js +++ b/lib/DataHarmonizer.js @@ -345,9 +345,12 @@ class DataHarmonizer { // FUTURE: figure out how to accomodate newlines? */ switch (range) { - case 'string': + case 'WhitespaceMinimizedString': new_field.datatype = 'xsd:token'; break; + case 'string': + new_field.datatype = 'string'; + break; case 'Provenance': new_field.datatype = 'Provenance'; break; @@ -563,6 +566,7 @@ class DataHarmonizer { } } + // Called by toolbox.js validate() async validate() { // const data = this.getTrimmedData(); const rowStart = 0; @@ -571,8 +575,11 @@ class DataHarmonizer { const colEnd = this.hot.countCols() - 1; const data = this.hot.getData(rowStart, colStart, rowEnd, colEnd); - await this.doPreValidationRepairs(data); + // doPreValidationRepairs(data) cleans some kinds of [row][column] value without + // signaling changes. + await this.doPreValidationRepairs(data); // Screen refresh requires await(). this.invalid_cells = this.getInvalidCells(data); + console.log("INVALID CELLS", this.invalid_cells); this.hot.render(); } @@ -2727,9 +2734,14 @@ class DataHarmonizer { return this.validator.validate(data, fieldNames); } + /** + * Cleans up some data[row][column] values silently + * xsd:token normalized to have whitespace removed. + */ doPreValidationRepairs(data) { return new Promise((resolve) => { const cellChanges = []; + const whitespace_minimized_re = new RegExp(/\s+/,'g'); let fullVersion = VERSION_TEXT + ', ' + @@ -2744,10 +2756,21 @@ class DataHarmonizer { continue; } for (let col = 0; col < data[row].length; col++) { - const cellVal = data[row][col]; + let cellVal = data[row][col]; const field = this.fields[col]; const datatype = field.datatype; + if (cellVal && datatype === 'xsd:token') { + + const minimized = cellVal.replace(whitespace_minimized_re, ' ').trim(); + // Update cellVal in advance of validateVal(s) below + if (minimized !== cellVal) { + cellVal = minimized; + data[row][col] = cellVal; + cellChanges.push([row, col, minimized, 'thisChange']); + } + }; + if (datatype === 'Provenance') { checkProvenance(cellChanges, fullVersion, cellVal, row, col); } else if (field.flatVocabulary) { From 075bbde45506a91f478707d4160ad1cdf1a315ea Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Thu, 12 Dec 2024 14:25:18 -0800 Subject: [PATCH 2/8] Implements xsd:token minimized whitespace while xsd:token officially may have >1 consecutive whitespace in it; it must officially reduce to minimal amount. Parse returns that. --- lib/utils/datatypes.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/utils/datatypes.js b/lib/utils/datatypes.js index 1ce754af..fa905d74 100644 --- a/lib/utils/datatypes.js +++ b/lib/utils/datatypes.js @@ -6,6 +6,8 @@ const DEFAULT_OPTIONS = { datetimeFormat: 'yyyy-MM-dd HH:mm', timeFormat: 'HH:mm', }; +const WHITESPACE_MINIMIZED_RE = new RegExp(/\s+/,'g'); + export class Datatypes { PARSERS = { 'xsd:integer': this.parseInteger, @@ -17,6 +19,7 @@ export class Datatypes { 'xsd:date': this.parseDate, 'xsd:dateTime': this.parseDateTime, 'xsd:time': this.parseTime, + 'xsd:token': this.parseToken, }; STRINGIFIERS = { 'xsd:date': this.stringifyDate, @@ -88,6 +91,11 @@ export class Datatypes { return parsed; } + // For xsd:token which returns reduced whitespace version + parseToken(value) { + return value.replace(WHITESPACE_MINIMIZED_RE, ' ').trim(); + } + stringifyDate(value) { return format(value, this.dateFormat); } From 430949c086c8212fabc67e5459ada32c1229e257 Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Thu, 12 Dec 2024 14:26:05 -0800 Subject: [PATCH 3/8] Issue was comparison of value was against indented menu item. Eliminated extra menu item spaces --- lib/utils/validation.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/utils/validation.js b/lib/utils/validation.js index 436b828c..a6bce07f 100644 --- a/lib/utils/validation.js +++ b/lib/utils/validation.js @@ -82,12 +82,17 @@ export function validateValAgainstVocab(value, field) { const ptr = field.flatVocabularyLCase.indexOf(trimmedVal); if (ptr >= 0) { valid = true; - // Normalised value being suggested for update - if (value != field.flatVocabulary[ptr]) { - update = field.flatVocabulary[ptr]; + // Normalised value being suggested for update; + // .trim() because flatVocabulary has indentation. + let val_trim = field.flatVocabulary[ptr].trim(); + if (value != val_trim) { + update = val_trim; } } + //if (update) + // console.log(valid, update, value, trimmedVal); } + return [valid, update]; } From 8e2c2076eb86ae4289441305db24d48afb83cfdd Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Thu, 12 Dec 2024 14:26:23 -0800 Subject: [PATCH 4/8] simplifying logic, same output. --- lib/Validator.js | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/Validator.js b/lib/Validator.js index a645d133..b526a823 100644 --- a/lib/Validator.js +++ b/lib/Validator.js @@ -236,20 +236,17 @@ class Validator { ); const validate = (value) => { - if (slotDefinition.required && !value) { - return 'This field is required'; - } - - if (slotDefinition.value_presence === 'PRESENT' && !value) { - return 'Value is not present'; - } else if (slotDefinition.value_presence === 'ABSENT' && value) { - return 'Value is not absent'; - } - if (!value) { + if (slotDefinition.required) + return 'This field is required'; + if (slotDefinition.value_presence === 'PRESENT') + return 'Value is not present'; return; } + if (slotDefinition.value_presence === 'ABSENT') + return 'Value is not absent'; + let splitValues; if (slotDefinition.multivalued) { splitValues = value.split(this.#multivaluedDelimiter); From b66fa5f0592fc69c93eedef7afb7eeb1821855da Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Fri, 13 Dec 2024 00:31:00 -0800 Subject: [PATCH 5/8] Better use of RegExp --- lib/utils/datatypes.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/utils/datatypes.js b/lib/utils/datatypes.js index fa905d74..2483e9ae 100644 --- a/lib/utils/datatypes.js +++ b/lib/utils/datatypes.js @@ -6,7 +6,8 @@ const DEFAULT_OPTIONS = { datetimeFormat: 'yyyy-MM-dd HH:mm', timeFormat: 'HH:mm', }; -const WHITESPACE_MINIMIZED_RE = new RegExp(/\s+/,'g'); +const RE_WHITESPACE_MINIMIZED = new RegExp(/\s+/,'g'); +const RE_DECIMAL = new RegExp(/^[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)$/); export class Datatypes { PARSERS = { @@ -53,7 +54,7 @@ export class Datatypes { } parseDecimal(value) { - const correctPattern = /^[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)$/.test(value); + const correctPattern = RE_DECIMAL.test(value); return correctPattern ? Number(value) : undefined; } @@ -93,7 +94,7 @@ export class Datatypes { // For xsd:token which returns reduced whitespace version parseToken(value) { - return value.replace(WHITESPACE_MINIMIZED_RE, ' ').trim(); + return value.replace(RE_WHITESPACE_MINIMIZED, ' ').trim(); } stringifyDate(value) { From 569b947c259e50bea12b04836e859963f08d7c81 Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Fri, 13 Dec 2024 00:31:18 -0800 Subject: [PATCH 6/8] cosmetic tweak --- lib/utils/validation.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/utils/validation.js b/lib/utils/validation.js index a6bce07f..06e7aade 100644 --- a/lib/utils/validation.js +++ b/lib/utils/validation.js @@ -89,8 +89,6 @@ export function validateValAgainstVocab(value, field) { update = val_trim; } } - //if (update) - // console.log(valid, update, value, trimmedVal); } return [valid, update]; From a5251b6c8f245fb024a7fa83bd98389ba28325f8 Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Fri, 13 Dec 2024 00:32:05 -0800 Subject: [PATCH 7/8] fixing ability to do numeric/date entry + NullValueMenu --- lib/Validator.js | 118 +++++++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 39 deletions(-) diff --git a/lib/Validator.js b/lib/Validator.js index b526a823..248c91d9 100644 --- a/lib/Validator.js +++ b/lib/Validator.js @@ -57,9 +57,13 @@ class Validator { if (!todos || !todos.length) { return; } + // todos is an array of strings. + const slotType = this.getSlotType(slotDefinition);// LinkML schema.type object - const slotType = this.getSlotType(slotDefinition); - + // Slot type could be: + // a number, string, date ... + // null - if it is only a menu + // both date and NullValueList if (slotType?.uri === 'xsd:date') { for (const todo of todos) { if (todo.substring(0, 2) === '>=') { @@ -70,16 +74,11 @@ class Validator { } } - for (const def of slotDefinition.any_of || []) { - processTodos(def, todos); - } - for (const def of slotDefinition.all_of || []) { - processTodos(def, todos); - } - for (const def of slotDefinition.exactly_one_of || []) { - processTodos(def, todos); - } - for (const def of slotDefinition.none_of || []) { + // Cycle through each slotDefinition any_of etc. object entries and get + // the datatype of its .range (or recurse) and in LinkML fashion attach + // minimum_value and maximum_value to the slotDefinition OR its any_of + // etc array BASED ON top level todos. E.g. inheriting min/max criteria. + for (const def of slotDefinition.any_of ?? slotDefinition.all_of ?? slotDefinition.exactly_one_of ?? slotDefinition.none_of ?? []) { processTodos(def, todos); } }; @@ -94,13 +93,15 @@ class Validator { */ this.#dependantMinimumValuesMap = new Map(); this.#dependantMaximumValuesMap = new Map(); + const RE_TODOS = new RegExp(/^([><])={(.*?)}$/); + for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) { const { todos } = slotDefinition; if (!todos || !todos.length) { continue; } for (const todo of todos) { - const match = todo.match(/^([><])={(.*?)}$/); + const match = todo.match(RE_TODOS); if (match == null) { continue; } @@ -140,7 +141,7 @@ class Validator { this.#valueValidatorMap = new Map(); } - /* This returns a single primitve data type for a slot - a decimal, date, + /* This returns a single primitive data type for a slot - a decimal, date, string etc. or possibly an enumeration. Enumerations are handled separately however (by const slotEnum = ...). Slots either use "range" attribute, OR they use any_of or exactly_one_of etc. attribute expression @@ -164,6 +165,12 @@ class Validator { return slotType; } + /* A validation function is prepared and cached for every slot presented, so + that validation throug rows of data can make use of already established + validator for each column. Particular columns may have sensitivity to other + column values (via min/max references) or special {today} e.g. so + + */ getValidatorForSlot(slot, options = {}) { const { cacheKey, inheritedRange } = options; if (typeof cacheKey === 'string' && this.#valueValidatorMap.has(cacheKey)) { @@ -177,11 +184,13 @@ class Validator { slotDefinition = slot; } + // This digs down into any_of, all_of etc to find first date, number etc. + // slotType is LinkML schema.type object if (!slotDefinition.range && inheritedRange) { slotDefinition.range = inheritedRange; } - const slotType = this.getSlotType(slotDefinition); + const slotType = this.getSlotType(slotDefinition); // LinkML schema.type object const slotEnum = this.#schema.enums?.[slotDefinition.range]; const slotPermissibleValues = Object.values( @@ -194,11 +203,13 @@ class Validator { // TEST CASE: // if (slotDefinition.name == "sample_collection_date") // console.log("any_of", DEBUG INFO) + // inheritedRange comes from original slot, so it might be a date or number + menu const anyOfValidators = (slotDefinition.any_of ?? []).map((subSlot) => this.getValidatorForSlot(subSlot, { inheritedRange: slotDefinition.range, }) ); + const allOfValidators = (slotDefinition.all_of ?? []).map((subSlot) => this.getValidatorForSlot(subSlot, { inheritedRange: slotDefinition.range, @@ -239,6 +250,7 @@ class Validator { if (!value) { if (slotDefinition.required) return 'This field is required'; + // value_presence is subject to dynamic rules? if (slotDefinition.value_presence === 'PRESENT') return 'Value is not present'; return; @@ -266,39 +278,62 @@ class Validator { splitValues = [value]; } + // For any of the value(s), whether they are valid depends on either + // an ok primitive data type parsing, OR a categorical menu match. + // Message needs for (const value of splitValues) { - if (slotType) { + let parse_error = false; + if (slotType) {// Doesn't pertain to slots which are ONLY enumerations. const parsed = this.#parser.parse(value, slotType.uri); + // Issue: parse can fail on decimal but menu has "Missing" if (parsed === undefined) { - return `Value does not match format for ${slotType.uri}`; - } + parse_error = `Value does not match format for ${slotType.uri}`; - if (slotMinimumValue !== undefined && parsed < slotMinimumValue) { - return 'Value is less than minimum value'; + //if (!(anyOfValidators.length || allOfValidators.length || exactlyOneOfValidators.length || noneOfValidators.length)) { + // return parse_error; + //} } + // All these cases have encountered an item which matches basic data + // datatype and so sudden death is ok. + else { - if (slotMaximumValue !== undefined && parsed > slotMaximumValue) { - return 'Value is greater than maximum value'; - } + if (slotMinimumValue !== undefined && parsed < slotMinimumValue) { + return 'Value is less than minimum value'; + } + + if (slotMaximumValue !== undefined && parsed > slotMaximumValue) { + return 'Value is greater than maximum value'; + } + + if ( + (slotDefinition.equals_string !== undefined && + parsed !== slotDefinition.equals_string) || + (slotDefinition.equals_number !== undefined && + parsed !== slotDefinition.equals_number) + ) { + return 'Value does not match constant'; + } - if ( - (slotDefinition.equals_string !== undefined && - parsed !== slotDefinition.equals_string) || - (slotDefinition.equals_number !== undefined && - parsed !== slotDefinition.equals_number) - ) { - return 'Value does not match constant'; - } + if ( + slotDefinition.pattern !== undefined && + !value.match(slotDefinition.pattern) + ) { + return 'Value does not match pattern'; + } - if ( - slotDefinition.pattern !== undefined && - !value.match(slotDefinition.pattern) - ) { - return 'Value does not match pattern'; + // Here slotType value tested and is ok! + continue; } + + // Here value didn't parse to slotType + + } + else { + // No basic slot type here so only enumeration handling. } + // Single range for slot given so no need to evaluate all_of, any_of etc. if (slotEnum && !slotPermissibleValues.includes(value)) { return 'Value is not allowed'; } @@ -342,12 +377,17 @@ class Validator { return 'One or more expressions of none_of held'; } } + + if (anyOfValidators.length || allOfValidators.length || exactlyOneOfValidators.length || noneOfValidators.length) { + // We passed validation here which means a parse error can be overriden + } + else if (parse_error.length) { + //There were no other ranges besides basic slotType so + return parse_error; + } } }; - if (typeof cacheKey === 'string') { - this.#valueValidatorMap.set(cacheKey, validate); - } return validate; } From e5d16dbad31f654244831e0d7b862d2855560ff1 Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Fri, 13 Dec 2024 14:51:24 -0800 Subject: [PATCH 8/8] tidying --- lib/DataHarmonizer.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/DataHarmonizer.js b/lib/DataHarmonizer.js index 8884214b..81c3130d 100644 --- a/lib/DataHarmonizer.js +++ b/lib/DataHarmonizer.js @@ -254,7 +254,6 @@ class DataHarmonizer { attributes[name], this.context.template.current.schema.slots[name] ); - // console.log(field); //let field = attributes[name]; let section_title = null; @@ -579,7 +578,7 @@ class DataHarmonizer { // signaling changes. await this.doPreValidationRepairs(data); // Screen refresh requires await(). this.invalid_cells = this.getInvalidCells(data); - console.log("INVALID CELLS", this.invalid_cells); + //console.log("INVALID CELLS", this.invalid_cells); this.hot.render(); } @@ -2656,6 +2655,7 @@ class DataHarmonizer { if (number >= 0) { // Here we have the 3 field call, with units sandwitched in the middle if (binOffset === 2) { + const unit = matrix[row][hotRowNextCol]; // Host age unit is interpreted by default to be year. // If user selects month, value is converted into years for binning. @@ -2691,8 +2691,9 @@ class DataHarmonizer { selection = bin_value; // Default value is itself. const bin_values = fields[hotRowBinCol].flatVocabulary; - if (!bin_value || (bin_value === '' && value in bin_values)) { + if (value in bin_values && (!bin_value || bin_value === '')) { selection = value; + console.log("no bin value", value); } // If a unit field exists, then set that to metadata too. if (binOffset == 2) { @@ -2701,7 +2702,7 @@ class DataHarmonizer { ? matrix[hotRowPtr][hotRowNextCol] : null; const unit_values = fields[col + 1].flatVocabulary; - if (!unit_value || (unit_value === '' && value in unit_values)) { + if (value in unit_values && (!unit_value || unit_value === '')) { triggered_changes.push([ hotRowPtr, hotRowNextCol,