From 9fe176a67c6a16266ab2bc83d44f1fc61acf3108 Mon Sep 17 00:00:00 2001 From: Teal Hobson-Lowther Date: Mon, 20 Aug 2018 14:59:08 -0700 Subject: [PATCH 1/5] separate json parsing and validation --- validators/bids.js | 262 ++++++++++++++++++++++++--------------------- 1 file changed, 140 insertions(+), 122 deletions(-) diff --git a/validators/bids.js b/validators/bids.js index 394e64233..c90b670b0 100644 --- a/validators/bids.js +++ b/validators/bids.js @@ -177,6 +177,7 @@ BIDS = { directory: [], }, niftis = [], + jsonFiles = [], ephys = [], headers = [], participants = null, @@ -452,7 +453,7 @@ BIDS = { }) } - // validate json + // load json data for validation later else if (file.name && file.name.endsWith('.json')) { utils.files.readFile(file, function(issue, contents) { if (issue) { @@ -460,7 +461,7 @@ BIDS = { process.nextTick(cb) return } - json(file, contents, function(issues, jsObj) { + utils.json.parse(file, contents, function(issues, jsObj) { self.issues = self.issues.concat(issues) // abort further tests if schema test does not pass @@ -472,14 +473,7 @@ BIDS = { } jsonContentsDict[file.relativePath] = jsObj - - // collect task summary - if (file.name.indexOf('task') > -1) { - var task = jsObj ? jsObj.TaskName : null - if (task && summary.tasks.indexOf(task) === -1) { - summary.tasks.push(task) - } - } + jsonFiles.push(file) process.nextTick(cb) }) }) @@ -523,62 +517,60 @@ BIDS = { } }, function() { - // check if same file with .nii and .nii.gz extensions is present - var niftiCounts = niftis - .map(function(val) { - return { count: 1, val: val.name.split('.')[0] } - }) - .reduce(function(a, b) { - a[b.val] = (a[b.val] || 0) + b.count - return a - }, {}) + // check json data + async.eachOfLimit( + jsonFiles, + 200, + function(file, key, cb) { + json(file, jsonContentsDict, function(issues, jsObj) { + self.issues = self.issues.concat(issues) + // collect task summary + if (file.name.indexOf('task') > -1) { + var task = jsObj ? jsObj.TaskName : null + if (task && summary.tasks.indexOf(task) === -1) { + summary.tasks.push(task) + } + } + process.nextTick(cb) + }) + }, + function() { + // check if same file with .nii and .nii.gz extensions is present + var niftiCounts = niftis + .map(function(val) { + return { count: 1, val: val.name.split('.')[0] } + }) + .reduce(function(a, b) { + a[b.val] = (a[b.val] || 0) + b.count + return a + }, {}) - var duplicates = Object.keys(niftiCounts).filter(function(a) { - return niftiCounts[a] > 1 - }) - if (duplicates.length !== 0) { - for (var key in duplicates) { - var duplicateFiles = niftis.filter(function(a) { - return a.name.split('.')[0] === duplicates[key] + var duplicates = Object.keys(niftiCounts).filter(function(a) { + return niftiCounts[a] > 1 }) - for (var file in duplicateFiles) { - self.issues.push( - new Issue({ - code: 74, - file: duplicateFiles[file], - }), - ) + if (duplicates.length !== 0) { + for (var key in duplicates) { + var duplicateFiles = niftis.filter(function(a) { + return a.name.split('.')[0] === duplicates[key] + }) + for (var file in duplicateFiles) { + self.issues.push( + new Issue({ + code: 74, + file: duplicateFiles[file], + }), + ) + } + } } - } - } - async.eachOfLimit( - niftis, - 200, - function(file, key, cb) { - if (self.options.ignoreNiftiHeaders) { - NIFTI( - null, - file, - jsonContentsDict, - bContentsDict, - fileList, - events, - function(issues) { - self.issues = self.issues.concat(issues) - process.nextTick(cb) - }, - ) - } else { - utils.files.readNiftiHeader(file, function(header) { - // check if header could be read - if (header && header.hasOwnProperty('error')) { - self.issues.push(header.error) - process.nextTick(cb) - } else { - headers.push([file, header]) + async.eachOfLimit( + niftis, + 200, + function(file, key, cb) { + if (self.options.ignoreNiftiHeaders) { NIFTI( - header, + null, file, jsonContentsDict, bContentsDict, @@ -589,75 +581,101 @@ BIDS = { process.nextTick(cb) }, ) + } else { + utils.files.readNiftiHeader(file, function(header) { + // check if header could be read + if (header && header.hasOwnProperty('error')) { + self.issues.push(header.error) + process.nextTick(cb) + } else { + headers.push([file, header]) + NIFTI( + header, + file, + jsonContentsDict, + bContentsDict, + fileList, + events, + function(issues) { + self.issues = self.issues.concat(issues) + process.nextTick(cb) + }, + ) + } + }) + } + }, + function() { + if (!hasSubjectDir) { + self.issues.push(new Issue({ code: 45 })) + } + if (!hasDatasetDescription) { + self.issues.push(new Issue({ code: 57 })) + } + // check if participants file match found subjects + if (participants) { + var participantsFromFile = participants.list.sort() + var participantsFromFolders = summary.subjects.sort() + if ( + !utils.array.equals( + participantsFromFolders, + participantsFromFile, + true, + ) + ) { + self.issues.push( + new Issue({ + code: 49, + evidence: + 'participants.tsv: ' + + participantsFromFile.join(', ') + + ' folder structure: ' + + participantsFromFolders.join(', '), + file: participants.file, + }), + ) + } } - }) - } - }, - function() { - if (!hasSubjectDir) { - self.issues.push(new Issue({ code: 45 })) - } - if (!hasDatasetDescription) { - self.issues.push(new Issue({ code: 57 })) - } - // check if participants file match found subjects - if (participants) { - var participantsFromFile = participants.list.sort() - var participantsFromFolders = summary.subjects.sort() - if ( - !utils.array.equals( - participantsFromFolders, - participantsFromFile, - true, - ) - ) { - self.issues.push( - new Issue({ - code: 49, - evidence: - 'participants.tsv: ' + - participantsFromFile.join(', ') + - ' folder structure: ' + - participantsFromFolders.join(', '), - file: participants.file, - }), - ) - } - } - // check if dataset contains T1w - if (summary.modalities.indexOf('T1w') < 0) { - self.issues.push( - new Issue({ - code: 53, - }), - ) - } + // check if dataset contains T1w + if (summary.modalities.indexOf('T1w') < 0) { + self.issues.push( + new Issue({ + code: 53, + }), + ) + } - //check for equal number of participants from ./phenotype/*.tsv and participants in dataset - TSV.checkphenotype(phenotypeParticipants, summary, self.issues) + //check for equal number of participants from ./phenotype/*.tsv and participants in dataset + TSV.checkphenotype(phenotypeParticipants, summary, self.issues) - // validate nii header fields - self.issues = self.issues.concat(headerFields(headers)) + // validate nii header fields + self.issues = self.issues.concat(headerFields(headers)) - // Events validation - Events.validateEvents( - events, - stimuli, - headers, - jsonContentsDict, - self.issues, - ) + // Events validation + Events.validateEvents( + events, + stimuli, + headers, + jsonContentsDict, + self.issues, + ) - // validation session files - self.issues = self.issues.concat(session(fileList)) + // validation session files + self.issues = self.issues.concat(session(fileList)) - self.issues = self.issues.concat( - checkAnyDataPresent(fileList, summary.subjects), + self.issues = self.issues.concat( + checkAnyDataPresent(fileList, summary.subjects), + ) + summary.modalities = utils.modalities.group(summary.modalities) + var issues = utils.issues.format( + self.issues, + summary, + self.options, + ) + callback(issues, summary) + }, ) - summary.modalities = utils.modalities.group(summary.modalities) - var issues = utils.issues.format(self.issues, summary, self.options) - callback(issues, summary) }, ) }, From 626907c5eec34cd2b4962633ccc070d2482631c5 Mon Sep 17 00:00:00 2001 From: Teal Hobson-Lowther Date: Mon, 20 Aug 2018 14:59:50 -0700 Subject: [PATCH 2/5] json validator checks inherited dictionaries against schemas --- validators/json.js | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/validators/json.js b/validators/json.js index e283fa796..440673af1 100644 --- a/validators/json.js +++ b/validators/json.js @@ -12,18 +12,19 @@ var Issue = utils.issues.Issue * it finds while validating against the BIDS * specification. */ -module.exports = function(file, contents, callback) { +module.exports = function(file, jsonContentsDict, callback) { // primary flow -------------------------------------------------------------------- var issues = [] - - utils.json.parse(file, contents, function(pissues, jsObj) { - issues = pissues - if (jsObj) { - issues = issues.concat(checkUnits(file, jsObj)) - } - callback(issues, jsObj) - }) + var potentialSidecars = utils.files.potentialLocations(file.relativePath) + var mergedDictionary = utils.files.generateMergedSidecarDict( + potentialSidecars, + jsonContentsDict, + ) + if (mergedDictionary) { + issues = issues.concat(checkUnits(file, mergedDictionary)) + } + callback(issues, mergedDictionary) } // individual checks --------------------------------------------------------------- From c572e86bf99142633a1e3b77b3303958c6c05103 Mon Sep 17 00:00:00 2001 From: Teal Hobson-Lowther Date: Mon, 20 Aug 2018 15:00:09 -0700 Subject: [PATCH 3/5] update existing tests to use jsonDict as input to json validator --- tests/json.spec.js | 82 +++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/tests/json.spec.js b/tests/json.spec.js index 1eb449a0d..ec85d530c 100644 --- a/tests/json.spec.js +++ b/tests/json.spec.js @@ -6,30 +6,39 @@ describe('JSON', function() { name: 'task-rest_bold.json', relativePath: '/task-rest_bold.json', } - - it('should catch missing closing brackets', function() { - validate.JSON(file, '{', function(issues) { - assert(issues && issues.length > 0) - }) - }) + var jsonDict = {} it('sidecars should have key/value pair for "RepetitionTime" expressed in seconds', function() { - var jsonObj = - '{"RepetitionTime": 1.2, "echo_time": 0.005, "flip_angle": 90, "TaskName": "Rest"}' - validate.JSON(file, jsonObj, function(issues) { + var jsonObj = { + RepetitionTime: 1.2, + echo_time: 0.005, + flip_angle: 90, + TaskName: 'Rest', + } + jsonDict[file.relativePath] = jsonObj + validate.JSON(file, jsonDict, function(issues) { assert(issues.length === 0) }) - var jsonObjInval = - '{"RepetitionTime": 1200, "echo_time": 0.005, "flip_angle": 90, "TaskName": "Rest"}' - validate.JSON(file, jsonObjInval, function(issues) { + var jsonObjInval = { + RepetitionTime: 1200, + echo_time: 0.005, + flip_angle: 90, + TaskName: 'Rest', + } + jsonDict[file.relativePath] = jsonObjInval + validate.JSON(file, jsonDict, function(issues) { assert(issues && issues.length === 1) }) }) it('should detect negative value for SliceTiming', function() { - var jsonObj = - '{"RepetitionTime": 1.2, "SliceTiming": [-1.0, 0.0, 1.0], "TaskName": "Rest"}' - validate.JSON(file, jsonObj, function(issues) { + var jsonObj = { + RepetitionTime: 1.2, + SliceTiming: [-1.0, 0.0, 1.0], + TaskName: 'Rest', + } + jsonDict[file.relativePath] = jsonObj + validate.JSON(file, jsonDict, function(issues) { assert(issues.length === 1 && issues[0].code == 55) }) }) @@ -40,17 +49,24 @@ describe('JSON', function() { } it('*_meg.json sidecars should have required key/value pairs', function() { - var jsonObj = - '{"TaskName": "Audiovis", "SamplingFrequency": 1000, ' + - ' "PowerLineFrequency": 50, "DewarPosition": "Upright", ' + - ' "SoftwareFilters": "n/a", "DigitizedLandmarks": true,' + - ' "DigitizedHeadPoints": false}' - validate.JSON(meg_file, jsonObj, function(issues) { + var jsonObj = { + TaskName: 'Audiovis', + SamplingFrequency: 1000, + PowerLineFrequency: 50, + DewarPosition: 'Upright', + SoftwareFilters: 'n/a', + DigitizedLandmarks: true, + DigitizedHeadPoints: false, + } + jsonDict[meg_file.relativePath] = jsonObj + validate.JSON(meg_file, jsonDict, function(issues) { assert(issues.length === 0) }) - var jsonObjInval = jsonObj.replace(/"SamplingFrequency": 1000, /g, '') - validate.JSON(meg_file, jsonObjInval, function(issues) { + var jsonObjInval = jsonObj + jsonObjInval['SamplingFrequency'] = '' + jsonDict[meg_file.relativePath] = jsonObjInval + validate.JSON(meg_file, jsonDict, function(issues) { assert(issues && issues.length === 1) }) }) @@ -61,15 +77,21 @@ describe('JSON', function() { } it('*_ieeg.json sidecars should have required key/value pairs', function() { - var jsonObj = - '{"TaskName": "Audiovis", "Manufacturer": "TDT", ' + - ' "PowerLineFrequency": 50, "SamplingFrequency": 10, "iEEGReference": "reference"}' - validate.JSON(ieeg_file, jsonObj, function(issues) { + var jsonObj = { + TaskName: 'Audiovis', + Manufacturer: 'TDT', + PowerLineFrequency: 50, + SamplingFrequency: 10, + iEEGReference: 'reference', + } + jsonDict[ieeg_file.relativePath] = jsonObj + validate.JSON(ieeg_file, jsonDict, function(issues) { assert(issues.length === 0) }) - - var jsonObjInval = jsonObj.replace(/"Manufacturer": "TDT", /g, '') - validate.JSON(ieeg_file, jsonObjInval, function(issues) { + var jsonObjInval = jsonObj + jsonObjInval['Manufacturer'] = '' + jsonDict[ieeg_file.relativePath] = jsonObjInval + validate.JSON(ieeg_file, jsonDict, function(issues) { assert(issues && issues.length === 1) }) }) From fcaac2a8f92ce0b0b5f1ebba8a153fe2ce5dd40d Mon Sep 17 00:00:00 2001 From: Teal Hobson-Lowther Date: Mon, 20 Aug 2018 15:37:32 -0700 Subject: [PATCH 4/5] add unit tests for merged json dictionaries --- tests/json.spec.js | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/json.spec.js b/tests/json.spec.js index ec85d530c..dd4922cb1 100644 --- a/tests/json.spec.js +++ b/tests/json.spec.js @@ -95,4 +95,77 @@ describe('JSON', function() { assert(issues && issues.length === 1) }) }) + + it('should use inherited sidecars to find missing fields', function() { + const multiEntryJsonDict = {} + + // this json file is missing the SamplingFrequency field + const partialJsonObj = { + TaskName: 'Audiovis', + PowerLineFrequency: 50, + DewarPosition: 'Upright', + SoftwareFilters: 'n/a', + DigitizedLandmarks: true, + DigitizedHeadPoints: false, + } + multiEntryJsonDict[meg_file.relativePath] = partialJsonObj + + // this json file (sitting at the root directory level) + // provides the missing json field + const inheritedMegFile = { + name: 'meg.json', + relativePath: '/meg.json', + } + + const restOfJsonObj = { + SamplingFrequency: 2000, + } + multiEntryJsonDict[inheritedMegFile.relativePath] = restOfJsonObj + + // json validation will pass because (when merged) there are no + // missing data fields + validate.JSON(meg_file, multiEntryJsonDict, function(issues) { + assert(issues.length == 0) + }) + }) + + it('should favor the sidecar on the directory level closest to the file being validated', function() { + const multiEntryJsonDict = {} + const lowLevelFile = { + name: 'run-01_meg.json', + relativePath: '/sub-01/run-01_meg.json', + } + + // this json file has a good SamplingFrequency field + const partialJsonObj = { + TaskName: 'Audiovis', + SamplingFrequency: 1000, + PowerLineFrequency: 50, + DewarPosition: 'Upright', + SoftwareFilters: 'n/a', + DigitizedLandmarks: true, + DigitizedHeadPoints: false, + } + multiEntryJsonDict[lowLevelFile.relativePath] = partialJsonObj + + // this json file (sitting at the root directory level) + // also has a SamplingFrequency field, but it is wrong. + const inheritedMegFile = { + name: 'meg.json', + relativePath: '/meg.json', + } + + const restOfJsonObj = { + SamplingFrequency: '', + } + multiEntryJsonDict[inheritedMegFile.relativePath] = restOfJsonObj + + // json validation will pass because merged dictionaries prefer + // field values of the json sidecar furthest from the root. + // /meg.json is closer to the root than /sub-01/run-01_meg.json + // and so the values of the latter should be preferred. + validate.JSON(lowLevelFile, multiEntryJsonDict, function(issues) { + assert(issues.length == 0) + }) + }) }) From e98216b2a1e8193216c1f75178cf5aa04cbb6a68 Mon Sep 17 00:00:00 2001 From: Teal Hobson-Lowther Date: Tue, 28 Aug 2018 10:23:56 -0700 Subject: [PATCH 5/5] promisify utils/files.js readFile() --- utils/files.js | 48 +++++++-------- utils/options.js | 13 +++-- validators/bids.js | 142 +++++++++++++++++++++++---------------------- 3 files changed, 104 insertions(+), 99 deletions(-) diff --git a/utils/files.js b/utils/files.js index 3da9a9907..5764ad0af 100644 --- a/utils/files.js +++ b/utils/files.js @@ -43,34 +43,34 @@ var fileUtils = { * In node the file should be a path to a file. * */ -function readFile(file, callback) { - if (fs) { - testFile(file, function(issue) { - if (issue) { - process.nextTick(function() { - callback(issue, null) - }) - return - } - fs.readFile(file.path, 'utf8', function(err, data) { - process.nextTick(function() { - callback(null, data) +function readFile(file) { + return new Promise((resolve, reject) => { + if (fs) { + testFile(file, function(issue) { + if (issue) { + process.nextTick(function() { + return reject(issue) + }) + } + fs.readFile(file.path, 'utf8', function(err, data) { + process.nextTick(function() { + return resolve(data) + }) }) }) - }) - } else { - var reader = new FileReader() - reader.onloadend = function(e) { - if (e.target.readyState == FileReader.DONE) { - if (!e.target.result) { - callback(new Issue({ code: 44, file: file }), null) - return + } else { + var reader = new FileReader() + reader.onloadend = function(e) { + if (e.target.readyState == FileReader.DONE) { + if (!e.target.result) { + return reject(new Issue({ code: 44, file: file })) + } + return resolve(e.target.result) } - callback(null, e.target.result) } + reader.readAsBinaryString(file) } - reader.readAsBinaryString(file) - } + }) } function getBIDSIgnoreFileObjNode(dir) { @@ -120,7 +120,7 @@ function getBIDSIgnore(dir, callback) { var bidsIgnoreFileObj = getBIDSIgnoreFileObj(dir) if (bidsIgnoreFileObj) { - readFile(bidsIgnoreFileObj, function(issue, content) { + readFile(bidsIgnoreFileObj).then(content => { ig = ig.add(content) callback(ig) }) diff --git a/utils/options.js b/utils/options.js index 9fe9edd19..f53d691eb 100644 --- a/utils/options.js +++ b/utils/options.js @@ -31,13 +31,14 @@ module.exports = { loadConfig: function(config, callback) { if (typeof config === 'string') { // load file - files.readFile({ path: config }, function(issue, contents) { - if (issue) { - callback([issue], { path: config }, null) - } else { + files + .readFile({ path: config }) + .then(contents => { callback(null, { path: config }, contents) - } - }) + }) + .catch(issue => { + callback([issue], { path: config }, null) + }) } else if (typeof config === 'object') { callback(null, { path: 'config' }, JSON.stringify(config)) } diff --git a/validators/bids.js b/validators/bids.js index c90b670b0..54de631cb 100644 --- a/validators/bids.js +++ b/validators/bids.js @@ -380,103 +380,107 @@ BIDS = { }), ) } - utils.files.readFile(file, function(issue, contents) { - if (issue) { - self.issues.push(issue) - process.nextTick(cb) - return - } - if (file.name.endsWith('_events.tsv')) { - events.push({ - file: file, - path: file.relativePath, - contents: contents, - }) - } - TSV.TSV(file, contents, fileList, function( - issues, - participantList, - stimFiles, - ) { - if (participantList) { - if (file.name.endsWith('participants.tsv')) { - participants = { - list: participantList, - file: file, + utils.files + .readFile(file) + .then(contents => { + if (file.name.endsWith('_events.tsv')) { + events.push({ + file: file, + path: file.relativePath, + contents: contents, + }) + } + TSV.TSV(file, contents, fileList, function( + issues, + participantList, + stimFiles, + ) { + if (participantList) { + if (file.name.endsWith('participants.tsv')) { + participants = { + list: participantList, + file: file, + } + } else if (file.relativePath.includes('phenotype/')) { + phenotypeParticipants.push({ + list: participantList, + file: file, + }) } - } else if (file.relativePath.includes('phenotype/')) { - phenotypeParticipants.push({ - list: participantList, - file: file, - }) } - } - if (stimFiles.length) { - // add unique new events to the stimuli.events array - stimuli.events = [...new Set([...stimuli.events, ...stimFiles])] - } - self.issues = self.issues.concat(issues) + if (stimFiles.length) { + // add unique new events to the stimuli.events array + stimuli.events = [ + ...new Set([...stimuli.events, ...stimFiles]), + ] + } + self.issues = self.issues.concat(issues) + process.nextTick(cb) + }) + }) + .catch(issue => { + self.issues.push(issue) process.nextTick(cb) }) - }) } // validate bvec else if (file.name && file.name.endsWith('.bvec')) { - utils.files.readFile(file, function(issue, contents) { - if (issue) { + utils.files + .readFile(file) + .then(contents => { + bContentsDict[file.relativePath] = contents + bvec(file, contents, function(issues) { + self.issues = self.issues.concat(issues) + process.nextTick(cb) + }) + }) + .catch(issue => { self.issues.push(issue) process.nextTick(cb) - return - } - bContentsDict[file.relativePath] = contents - bvec(file, contents, function(issues) { - self.issues = self.issues.concat(issues) - process.nextTick(cb) }) - }) } // validate bval else if (file.name && file.name.endsWith('.bval')) { - utils.files.readFile(file, function(issue, contents) { - if (issue) { + utils.files + .readFile(file) + .then(contents => { + bContentsDict[file.relativePath] = contents + bval(file, contents, function(issues) { + self.issues = self.issues.concat(issues) + process.nextTick(cb) + }) + }) + .catch(issue => { self.issues.push(issue) process.nextTick(cb) - return - } - bContentsDict[file.relativePath] = contents - bval(file, contents, function(issues) { - self.issues = self.issues.concat(issues) - process.nextTick(cb) }) - }) } // load json data for validation later else if (file.name && file.name.endsWith('.json')) { - utils.files.readFile(file, function(issue, contents) { - if (issue) { - self.issues.push(issue) - process.nextTick(cb) - return - } - utils.json.parse(file, contents, function(issues, jsObj) { - self.issues = self.issues.concat(issues) - - // abort further tests if schema test does not pass - for (var i = 0; i < issues.length; i++) { - if (issues[i].severity === 'error') { + utils.files + .readFile(file) + .then(contents => { + utils.json.parse(file, contents, function(issues, jsObj) { + self.issues = self.issues.concat(issues) + + // abort further tests if schema test does not pass + if (issues.some(issue => issue.severity === 'error')) { process.nextTick(cb) return } - } - jsonContentsDict[file.relativePath] = jsObj - jsonFiles.push(file) + jsonContentsDict[file.relativePath] = jsObj + jsonFiles.push(file) + process.nextTick(cb) + }) + }) + .catch(issue => { + self.issues.push(issue) process.nextTick(cb) }) - }) } else { process.nextTick(cb) }