-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.js
467 lines (428 loc) · 15.7 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
import { NamedNode, triple } from 'rdflib';
import bodyParser from 'body-parser';
import { LOG_INCOMING_DELTA, LOG_INCOMING_SCAN_REQUESTS } from './config';
import {
app,
errorHandler,
sparqlEscapeDateTime,
sparqlEscapeString,
sparqlEscapeUri,
uuid,
} from 'mu';
import { querySudo as query, updateSudo as update } from '@lblod/mu-auth-sudo';
import { Delta } from './lib/delta';
import { existsSync } from 'node:fs';
import NodeClam from 'clamscan';
const STIX_MALWARE_RESULT_OV = ['benign', 'suspicious', 'malicious', 'unknown'];
app.use(bodyParser.json({ limit: '50mb' }));
app.get('/', function (req, res) {
res.send('Hello from virus-scanner-service');
});
app.post(
'/delta',
async function (req, res) {
try {
const body = req.body;
if (LOG_INCOMING_DELTA) {
console.log(`Receiving delta : ${JSON.stringify(body)}`);
}
const delta = new Delta(req.body);
if (!delta.inserts.length) {
console.log(
'Delta does not contain any insertions. Nothing should happen.',
);
return res.status(204).send();
}
const filesInDelta = delta
.getInsertsFor(
triple(
undefined,
new NamedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
new NamedNode(
'http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#FileDataObject',
),
),
)
.map((insert) => insert.subject.value);
const logicalFilesInDelta = filesInDelta.filter(
(fileIRI) => !fileIRI.startsWith('share://'),
);
if (!logicalFilesInDelta.length) {
console.log(
'No FileDataObject inserts for logical files. Nothing should happen.',
);
return res.status(204).send();
}
res.status(202).send();
const filesToScan = [...new Set(logicalFilesInDelta)]; //make them unique
console.log('File IRIs to be scanned: ' + JSON.stringify(filesToScan));
const fileResults = [];
for (const file of filesToScan) {
const scanFileResult = await scanFile(file);
const storeResult = await storeMalwareAnalysis(
file,
scanFileResult.stixMalwareAnalysis,
);
fileResults.push({
file,
...scanFileResult,
...storeResult,
});
}
console.log('Finished scanning files.');
console.log('\nDetailed results per file:');
console.dir(fileResults, { depth: null });
console.log('\nFiles per STIX Malware Analysis result:');
const resultValues = [
...new Set([
...STIX_MALWARE_RESULT_OV,
...fileResults
.map((fileResult) => fileResult.stixMalwareAnalysis.result)
.sort(),
]),
];
resultValues.map((resultValue) => {
console.log('- ' + resultValue + ' :');
console.log(
fileResults
.filter(
(fileResult) =>
fileResult.stixMalwareAnalysis.result === resultValue,
)
.map((fileResult) => fileResult.file),
);
});
// TODO: Let storeMalwareAnalysis() run a SELECT query after the insert
// to check in which (if any) graphs the resource was inserted?
// Would be more reliable and easier to parse.
const filesNoDatabaseUpdate = fileResults.filter(
(fileResult) =>
!(
fileResult.databaseResponse?.results?.bindings[0]?.['callret-0']
?.value &&
fileResult.databaseResponse?.results?.bindings[0]?.[
'callret-0'
]?.value.match(' -- done')
),
);
if (filesNoDatabaseUpdate.length) {
console.log(
'\nFiles for which the database response indicates that the ' +
'malware analysis resource object was not added to any graph:',
);
console.dir(filesNoDatabaseUpdate, { depth: null });
}
} catch (e) {
console.log(e);
res.status(500).send('Uncaught error in /delta: ' + e);
// TODO: Re-throw error? Because not sure if the response 500
// will be received (e.g. /delta already sent 202 or /post
// connection timeout during long scan). If the result was
// not "benign", but not stored because of an error, an
// earlier "benign" result for the same file will remain the
// latest result. Perhaps such errors should require more
// attention.
// OTOH: - Re-throwing lets the entire service crash.
// - Perhaps it is the user's responsibility anyway to
// check that the last malware-analyse is recent enough.
// - Already a note about this in README.
// Could be somewhat mitigated by storing a malware analysis
// with only analysis-started before the scan, and update it
// after the scan.
}
},
);
/**
* Scans a single file and stores the result.
*
* @param {Object} body Request body should be in JSON-format with
* `file` containing a logical file IRI as a single String.
* E.g. { "file": "http://mu.semte.ch/services/file-service/files/6543bc046ea4f3000e00000c" }
* @return [201] if file was found in database, a malware analysis ran and the
* results were sent to the database. The response body contains
* the malware analysis results and the database response.
* - If the scan failed, the result will be "unknown".
* - If databaseResponse is null the result was not inserted in
* any graph, most likely because file IRI was not in any graph.
* @return [400] if request malformed.
* @return [422] if no related physical file is found in database.
*/
app.post(
'/scan',
async function (req, res) {
try {
const body = req.body;
if (LOG_INCOMING_SCAN_REQUESTS) {
console.log(`Receiving scan request : ${JSON.stringify(body)}`);
}
const logicalFileIRI = body.file;
if (
!(
typeof logicalFileIRI === 'string' || logicalFileIRI instanceof String
) ||
!logicalFileIRI.length
) {
return res.status(400).send('`file` not a non-empty String');
}
if (logicalFileIRI.startsWith('share://')) {
// TODO: Be flexible and lookup the logical file IRI? Can we assume
// that, even if the physical file IRI exists in multiple graphs,
// they will all be related to the same logical file IRI?
return res
.status(422)
.send('`file` is a physical file IRI, should be a logical file IRI');
}
// TODO: Check for existence of `<logicalFileIRI> a nfo:FileDataObject`?
const scanFileResult = await scanFile(logicalFileIRI);
const storeResult = await storeMalwareAnalysis(
logicalFileIRI,
scanFileResult.stixMalwareAnalysis,
);
console.dir(
{
file: logicalFileIRI,
...scanFileResult,
...storeResult,
},
{ depth: null },
);
res.status(201).send(storeResult.resourceObject);
} catch (e) {
console.log(e);
res.status(500).send('Uncaught error in /scan: ' + e);
// TODO: Same question as for /delta: Re-throw error?
}
},
);
app.use(errorHandler);
/**
* Scans a file for viruses.
*
* @async
* @function
* @param {String} fileIRI - IRI file to scan. This can be a logical/virtual
* file IRI or a physical/stored file IRI.
* @returns {Object} Properties:
* .stixMalwareAnalysis - The malware analysis details. Remarks:
* - result: If "unknown", see .error.
* - sampleRef: Not set, see .lookups.
* .error - Error object (if any).
* .lookups - Results of lookups from the requested file IRI to
* the physical file path.
*/
async function scanFile(fileIRI) {
const ret = {
stixMalwareAnalysis: {
analysisStarted: new Date(),
analysisEnded: undefined,
result: 'unknown',
resultName: undefined,
},
error: undefined,
lookups: undefined,
};
let physicalFileIRI;
let file;
try {
physicalFileIRI = fileIRI.startsWith('share://')
? fileIRI
: await getPhysicalFileIRI(fileIRI);
if (physicalFileIRI === null) {
throw new Error('No physical file IRI found for: ' + fileIRI);
}
file = filePathFromIRI(physicalFileIRI);
if (!existsSync(file)) {
throw new Error('File not found on disk: ' + JSON.stringify(file));
}
const clamscanResult = await clamscanFile(file);
const fileHasVirus = clamscanResult.isInfected;
switch (fileHasVirus) {
case false:
ret.stixMalwareAnalysis.result = 'benign';
break;
case true:
ret.stixMalwareAnalysis.result = 'malicious';
ret.stixMalwareAnalysis.resultName = JSON.stringify(
clamscanResult.viruses,
);
break;
case null:
throw new Error('clamscan JS returned null: Unable to scan');
break; // eslint-disable-line no-unreachable
default:
throw new Error('Unexpected return value from clamscan JS');
}
} catch (e) {
ret.error = e;
}
ret.stixMalwareAnalysis.analysisEnded = new Date();
ret.lookups = {
requestedFileIRI: fileIRI,
physicalFileIRI,
physicalFilePath: file,
};
console.log(ret);
return ret;
}
/**
* Calls the clamscan JS library to scan a file for viruses.
*
* @async
* @function
* @param {String} path - Path of file to scan.
* @returns {Object} As per clamscan 2.1.2:
* - `file` (string) The original `filePath` passed into the `isInfected`
* method.
* - `isInfected` (boolean) **True**: File is infected;
* **False**: File is clean.
* **NULL**: Unable to scan.
* - `viruses` (array) An array of any viruses found in the scanned file.
*/
async function clamscanFile(path) {
console.log('Running virus scan on file: ' + JSON.stringify(path));
const scanner = await new NodeClam().init({
clamscan: {
// Do not use clamscan binary because it loads database on every run.
active: false,
},
clamdscan: {
socket: '/var/run/clamav/clamd.ctl', // Unix domain socket
host: false, // Do not connect via TCP interface
port: false, // Do not connect via TCP interface
localFallback: false, // Do not use local preferred binary to scan if socket/tcp fails
active: true,
},
preference: 'clamdscan',
});
const result = await scanner.isInfected(path);
console.log(result);
return result;
// For now, error handling will be the responsibility of the function caller.
}
/**
* Gets the physical file IRI associated to a virtual/logical file IRI
*/
async function getPhysicalFileIRI(logicalFileIRI) {
const result = await query(`
PREFIX nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#>
SELECT ?physicalFile
WHERE {
GRAPH ?g {
?physicalFile nie:dataSource ${sparqlEscapeUri(logicalFileIRI)} .
}
}
`);
if (result.results.bindings.length)
// `[0]` is based on the assumption that, even if there are triples
// for the logical file IRI in multiple graphs, they will all be
// related to the same physical file IRI, so the array will always
// only contain 1 physical file IRI.
return result.results.bindings[0]['physicalFile'].value;
return null;
}
/**
* Converts a physical file IRI to a file path
*
* The URI of the stored file uses the share:// protocol and
* reflects the location where the file resides as a relative
* path to the share folder. E.g. share://uploads/my-file.pdf
* means the file is stored at /share/uploads/my-file.pdf.
* -- https://github.com/mu-semtech/file-service/blob/v3.3.0/README.md#description
*/
function filePathFromIRI(physicalFileIRI) {
return physicalFileIRI.replace(/^share:\/\//, '/share/');
}
/**
* Stores the result of a malware-scan in the database.
*
* A stix:MalwareAnalysis resource is stored in all graphs containing the
* supplied file IRI.
*
* Notes:
* - This function does not lookup and flag related file IRIs. If both the
* logical/virtual file IRI and physical/stored file IRI need to be flagged,
* call this function again for each IRI.
* - If fileIRI does not exist in any graph in the database, the returned
* resource object will not have been inserted anywhere in the database.
*
* @param {String} fileIRI - IRI of the file to be flagged.
* @param {Object} stixMalwareAnalysis - The malware analysis details.
* Properties: .analysisStarted: Timestamp of start of analysis.
* .analysisEnded : Timestamp of end of analysis.
* .result : Usually one of the values from
* STIX 2.1 Malware Result Vocabulary malware-result-ov:
* "malicious", "suspicious", "benign" or "unknown".
* https://docs.oasis-open.org/cti/stix/v2.1/cs01/stix-v2.1-cs01.html#_dtrq0daddkwa
* .resultName : JSON string of array of viruses found.
* @return {Object} Properties:
* .resourceObject: JavaScript object representation of the malware
* analysis resource object.
* .databaseResponse: null if not inserted in any graph. Otherwise
* .results.bindings[0].['callret-0'].value {String} Textual database
* response mentioning the graphs in which the malware analysis
* resource object was inserted (may still be 0, check response).
*/
async function storeMalwareAnalysis(fileIRI, stixMalwareAnalysis) {
const ret = {
resourceObject: undefined,
databaseResponse: undefined,
};
const malwareAnalysisId = uuid();
// TODO: Not http://data.gift/id/virus-scanner/analysis/1 ?
// or: http://data.gift/services/id/virus-scanner/analysis/1 ?
const malwareAnalysisIri =
'http://data.gift/virus-scanner/analysis/id/'.concat(malwareAnalysisId);
let databaseResponse;
try {
databaseResponse = await update(`
PREFIX stix: <http://docs.oasis-open.org/cti/ns/stix#>
PREFIX mu: <http://mu.semte.ch/vocabularies/core/>
PREFIX nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#>
INSERT {
GRAPH ?g {
${sparqlEscapeUri(malwareAnalysisIri)}
a stix:MalwareAnalysis;
mu:uuid ${sparqlEscapeString(malwareAnalysisId)};
stix:analysis_started ${sparqlEscapeDateTime(
stixMalwareAnalysis.analysisStarted,
)};
stix:analysis_ended ${sparqlEscapeDateTime(
stixMalwareAnalysis.analysisEnded,
)};
stix:result ${sparqlEscapeString(stixMalwareAnalysis.result)};
stix:sample_ref ${sparqlEscapeUri(fileIRI)} .
}
}
WHERE {
GRAPH ?graph {
${sparqlEscapeUri(fileIRI)} a nfo:FileDataObject .
}
BIND(?graph AS ?g)
}
`);
} catch (e) {
console.log(
`Failed to store malware analysis of <${fileIRI}> in triplestore: \n ${e}`,
);
throw e;
}
ret.resourceObject = {
data: {
type: 'malware-analyses',
id: malwareAnalysisId,
attributes: {
// TODO: Ok to include uri? Not a property in database, but
// mu-cl-resource include-uri also adds it as an attribute.
uri: malwareAnalysisIri,
'analysis-started': stixMalwareAnalysis.analysisStarted,
'analysis-ended': stixMalwareAnalysis.analysisEnded,
result: stixMalwareAnalysis.result,
'sample-ref': fileIRI,
},
},
// TODO: links.self
};
ret.databaseResponse = databaseResponse;
console.log(ret);
return ret;
}