From f1e55f6bfbc1d8879e0b5d33815bf16523cd65a4 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 19 Oct 2023 21:48:42 +0200 Subject: [PATCH 1/8] detect/datajson: introduce feature This patch introduces a new keyword datajson that is similar to dataset with a twist. Where dataset allows match from sets, datajson allows the same but also adds JSON data to the alert event. This data is comint from the set definition it self. For example, an ipv4 set will look like: 10.16.1.11,{"test": "success","context":3} The syntax is value and json data separated by a comma. The syntax of the keyword is the following: datajson:isset,src_ip,type ip,load src.lst,key src_ip; Compare to dataset, it just have a supplementary option key that is used to indicate in which subobject the JSON value should be added. The information is added in the even under the alert.extra subobject: "alert": { "extra": { "src_ip": { "test": "success", "context": 3 }, The main interest of the feature is to be able to contextualize a match. For example, if you have an IOC source, you can do value1,{"actor":"APT28","Country":"FR"} value2,{"actor":"APT32","Country":"NL"} This way, a single dataset is able to produce context to the event where it was not possible before and multiple signatures had to be used. Ticket: #7372 --- etc/schema.json | 4 + src/Makefile.am | 3 + src/datasets-ipv4.c | 34 ++ src/datasets-ipv4.h | 11 + src/datasets-ipv6.c | 33 ++ src/datasets-ipv6.h | 10 + src/datasets-json.h | 38 ++ src/datasets-md5.c | 33 ++ src/datasets-md5.h | 11 + src/datasets-sha256.c | 33 ++ src/datasets-sha256.h | 11 + src/datasets-string.c | 59 +++ src/datasets-string.h | 12 + src/datasets.c | 679 ++++++++++++++++++++++--- src/datasets.h | 4 + src/decode.h | 4 + src/detect-datajson.c | 369 ++++++++++++++ src/detect-datajson.h | 46 ++ src/detect-engine-alert.c | 18 + src/detect-engine-content-inspection.c | 14 +- src/detect-engine-register.c | 2 + src/detect-engine-register.h | 1 + src/detect.c | 1 + src/detect.h | 14 + src/output-json-alert.c | 7 + 25 files changed, 1374 insertions(+), 77 deletions(-) create mode 100644 src/datasets-json.h create mode 100644 src/detect-datajson.c create mode 100644 src/detect-datajson.h diff --git a/etc/schema.json b/etc/schema.json index cf03a2db30b6..f48a76381020 100644 --- a/etc/schema.json +++ b/etc/schema.json @@ -212,6 +212,10 @@ "xff": { "type": "string" }, + "extra": { + "type": "object", + "additionalProperties": true + }, "metadata": { "type": "object", "properties": { diff --git a/src/Makefile.am b/src/Makefile.am index 7d05751ec254..bcdb0147ed15 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -50,6 +50,7 @@ noinst_HEADERS = \ datasets.h \ datasets-ipv4.h \ datasets-ipv6.h \ + datasets-json.h \ datasets-md5.h \ datasets-reputation.h \ datasets-sha256.h \ @@ -102,6 +103,7 @@ noinst_HEADERS = \ detect-config.h \ detect-content.h \ detect-csum.h \ + detect-datajson.h \ detect-datarep.h \ detect-dataset.h \ detect-dce-iface.h \ @@ -662,6 +664,7 @@ libsuricata_c_a_SOURCES = \ detect-config.c \ detect-content.c \ detect-csum.c \ + detect-datajson.c \ detect-datarep.c \ detect-dataset.c \ detect-dce-iface.c \ diff --git a/src/datasets-ipv4.c b/src/datasets-ipv4.c index 67f8778fd2d6..1e6fc07d4973 100644 --- a/src/datasets-ipv4.c +++ b/src/datasets-ipv4.c @@ -56,3 +56,37 @@ uint32_t IPv4Hash(uint32_t hash_seed, void *s) void IPv4Free(void *s) { } + +int IPv4JsonSet(void *dst, void *src) +{ + IPv4TypeJson *src_s = src; + IPv4TypeJson *dst_s = dst; + memcpy(dst_s->ipv4, src_s->ipv4, sizeof(dst_s->ipv4)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + +bool IPv4JsonCompare(void *a, void *b) +{ + const IPv4TypeJson *as = a; + const IPv4TypeJson *bs = b; + + return (memcmp(as->ipv4, bs->ipv4, sizeof(as->ipv4)) == 0); +} + +uint32_t IPv4JsonHash(uint32_t hash_seed, void *s) +{ + const IPv4TypeJson *str = s; + return hashword((uint32_t *)str->ipv4, 1, hash_seed); +} + +// data stays in hash +void IPv4JsonFree(void *s) +{ + const IPv4TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-ipv4.h b/src/datasets-ipv4.h index 4a840e9aa631..3fc23ee2958e 100644 --- a/src/datasets-ipv4.h +++ b/src/datasets-ipv4.h @@ -25,15 +25,26 @@ #define SURICATA_DATASETS_IPV4_H #include "datasets-reputation.h" +#include "datasets-json.h" typedef struct IPv4Type { uint8_t ipv4[4]; DataRepType rep; } IPv4Type; +typedef struct IPv4TypeJson { + uint8_t ipv4[4]; + DataJsonType json; +} IPv4TypeJson; + int IPv4Set(void *dst, void *src); bool IPv4Compare(void *a, void *b); uint32_t IPv4Hash(uint32_t hash_seed, void *s); void IPv4Free(void *s); +int IPv4JsonSet(void *dst, void *src); +bool IPv4JsonCompare(void *a, void *b); +uint32_t IPv4JsonHash(uint32_t hash_seed, void *s); +void IPv4JsonFree(void *s); + #endif /* SURICATA_DATASETS_IPV4_H */ diff --git a/src/datasets-ipv6.c b/src/datasets-ipv6.c index ac96374da7c7..61beb56979ba 100644 --- a/src/datasets-ipv6.c +++ b/src/datasets-ipv6.c @@ -56,3 +56,36 @@ uint32_t IPv6Hash(uint32_t hash_seed, void *s) void IPv6Free(void *s) { } + +int IPv6JsonSet(void *dst, void *src) +{ + IPv6TypeJson *src_s = src; + IPv6TypeJson *dst_s = dst; + memcpy(dst_s->ipv6, src_s->ipv6, sizeof(dst_s->ipv6)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + +bool IPv6JsonCompare(void *a, void *b) +{ + const IPv6TypeJson *as = a; + const IPv6TypeJson *bs = b; + + return (memcmp(as->ipv6, bs->ipv6, sizeof(as->ipv6)) == 0); +} + +uint32_t IPv6JsonHash(uint32_t hash_seed, void *s) +{ + const IPv6TypeJson *str = s; + return hashword((uint32_t *)str->ipv6, 4, hash_seed); +} + +void IPv6JsonFree(void *s) +{ + const IPv6TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-ipv6.h b/src/datasets-ipv6.h index c75ad194d685..eeedd4121a80 100644 --- a/src/datasets-ipv6.h +++ b/src/datasets-ipv6.h @@ -31,9 +31,19 @@ typedef struct IPv6Type { DataRepType rep; } IPv6Type; +typedef struct IPv6TypeJson { + uint8_t ipv6[16]; + DataJsonType json; +} IPv6TypeJson; + int IPv6Set(void *dst, void *src); bool IPv6Compare(void *a, void *b); uint32_t IPv6Hash(uint32_t hash_seed, void *s); void IPv6Free(void *s); +int IPv6JsonSet(void *dst, void *src); +bool IPv6JsonCompare(void *a, void *b); +uint32_t IPv6JsonHash(uint32_t hash_seed, void *s); +void IPv6JsonFree(void *s); + #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-json.h b/src/datasets-json.h new file mode 100644 index 000000000000..2a508fe69a9d --- /dev/null +++ b/src/datasets-json.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef __DATASETS_JSON_H__ +#define __DATASETS_JSON_H__ + +#include +typedef struct DataJsonType { + char *value; + size_t len; +} DataJsonType; + +typedef struct DataJsonResultType { + bool found; + DataJsonType json; +} DataJsonResultType; + +#endif /* __DATASETS_JSON_H__ */ diff --git a/src/datasets-md5.c b/src/datasets-md5.c index 28fd37d8304a..77a94f2ae1c3 100644 --- a/src/datasets-md5.c +++ b/src/datasets-md5.c @@ -57,3 +57,36 @@ uint32_t Md5StrHash(uint32_t hash_seed, void *s) void Md5StrFree(void *s) { } + +int Md5StrJsonSet(void *dst, void *src) +{ + Md5TypeJson *src_s = src; + Md5TypeJson *dst_s = dst; + memcpy(dst_s->md5, src_s->md5, sizeof(dst_s->md5)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + +bool Md5StrJsonCompare(void *a, void *b) +{ + const Md5TypeJson *as = a; + const Md5TypeJson *bs = b; + + return (memcmp(as->md5, bs->md5, sizeof(as->md5)) == 0); +} + +uint32_t Md5StrJsonHash(uint32_t hash_seed, void *s) +{ + const Md5TypeJson *str = s; + return hashword((uint32_t *)str->md5, sizeof(str->md5) / 4, hash_seed); +} + +// data stays in hash +void Md5StrJsonFree(void *s) +{ + const Md5TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-md5.h b/src/datasets-md5.h index 88c1ff1dfd3b..858ecf1b6ea6 100644 --- a/src/datasets-md5.h +++ b/src/datasets-md5.h @@ -25,15 +25,26 @@ #define SURICATA_DATASETS_MD5_H #include "datasets-reputation.h" +#include "datasets-json.h" typedef struct Md5Type { uint8_t md5[16]; DataRepType rep; } Md5Type; +typedef struct Md5TypeJson { + uint8_t md5[16]; + DataJsonType json; +} Md5TypeJson; + int Md5StrSet(void *dst, void *src); bool Md5StrCompare(void *a, void *b); uint32_t Md5StrHash(uint32_t hash_seed, void *s); void Md5StrFree(void *s); +int Md5StrJsonSet(void *dst, void *src); +bool Md5StrJsonCompare(void *a, void *b); +uint32_t Md5StrJsonHash(uint32_t hash_seed, void *s); +void Md5StrJsonFree(void *s); + #endif /* SURICATA_DATASETS_MD5_H */ diff --git a/src/datasets-sha256.c b/src/datasets-sha256.c index 240939c08454..aa9f4df137c2 100644 --- a/src/datasets-sha256.c +++ b/src/datasets-sha256.c @@ -56,3 +56,36 @@ void Sha256StrFree(void *s) { // no dynamic data } + +int Sha256StrJsonSet(void *dst, void *src) +{ + Sha256TypeJson *src_s = src; + Sha256TypeJson *dst_s = dst; + memcpy(dst_s->sha256, src_s->sha256, sizeof(dst_s->sha256)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + +bool Sha256StrJsonCompare(void *a, void *b) +{ + Sha256TypeJson *as = a; + Sha256TypeJson *bs = b; + + return (memcmp(as->sha256, bs->sha256, sizeof(as->sha256)) == 0); +} + +uint32_t Sha256StrJsonHash(uint32_t hash_seed, void *s) +{ + Sha256TypeJson *str = s; + return hashword((uint32_t *)str->sha256, sizeof(str->sha256) / 4, hash_seed); +} + +// data stays in hash +void Sha256StrJsonFree(void *s) +{ + const Sha256TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-sha256.h b/src/datasets-sha256.h index 4f99b85a96e6..912a6b2e5b1f 100644 --- a/src/datasets-sha256.h +++ b/src/datasets-sha256.h @@ -25,15 +25,26 @@ #define SURICATA_DATASETS_SHA256_H #include "datasets-reputation.h" +#include "datasets-json.h" typedef struct Sha256Type { uint8_t sha256[32]; DataRepType rep; } Sha256Type; +typedef struct Sha256TypeJson { + uint8_t sha256[32]; + DataJsonType json; +} Sha256TypeJson; + int Sha256StrSet(void *dst, void *src); bool Sha256StrCompare(void *a, void *b); uint32_t Sha256StrHash(uint32_t hash_seed, void *s); void Sha256StrFree(void *s); +int Sha256StrJsonSet(void *dst, void *src); +bool Sha256StrJsonCompare(void *a, void *b); +uint32_t Sha256StrJsonHash(uint32_t hash_seed, void *s); +void Sha256StrJsonFree(void *s); + #endif /* SURICATA_DATASETS_SHA256_H */ diff --git a/src/datasets-string.c b/src/datasets-string.c index 85fe864f52db..e00eefe5fa0b 100644 --- a/src/datasets-string.c +++ b/src/datasets-string.c @@ -103,3 +103,62 @@ void StringFree(void *s) StringType *str = s; SCFree(str->ptr); } + +int StringJsonAsBase64(const void *s, char *out, size_t out_size) +{ + const StringTypeJson *str = s; + + unsigned long len = Base64EncodeBufferSize(str->len); + uint8_t encoded_data[len]; + if (Base64Encode((unsigned char *)str->ptr, str->len, encoded_data, &len) != SC_BASE64_OK) + return 0; + + strlcpy(out, (const char *)encoded_data, out_size); + strlcat(out, "\n", out_size); + return strlen(out); +} + +int StringJsonSet(void *dst, void *src) +{ + StringTypeJson *src_s = src; + StringTypeJson *dst_s = dst; + SCLogDebug("dst %p src %p, src_s->ptr %p src_s->len %u", dst, src, src_s->ptr, src_s->len); + + dst_s->len = src_s->len; + dst_s->ptr = SCMalloc(dst_s->len); + BUG_ON(dst_s->ptr == NULL); + memcpy(dst_s->ptr, src_s->ptr, dst_s->len); + + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + SCLogDebug("dst %p src %p, dst_s->ptr %p dst_s->len %u", dst, src, dst_s->ptr, dst_s->len); + return 0; +} + +bool StringJsonCompare(void *a, void *b) +{ + const StringTypeJson *as = a; + const StringTypeJson *bs = b; + + if (as->len != bs->len) + return false; + + return (memcmp(as->ptr, bs->ptr, as->len) == 0); +} + +uint32_t StringJsonHash(uint32_t hash_seed, void *s) +{ + StringTypeJson *str = s; + return hashlittle_safe(str->ptr, str->len, hash_seed); +} + +// base data stays in hash +void StringJsonFree(void *s) +{ + StringTypeJson *str = s; + SCFree(str->ptr); + if (str->json.value) { + SCFree(str->json.value); + } +} diff --git a/src/datasets-string.h b/src/datasets-string.h index 745754fc49ac..302937f01eb4 100644 --- a/src/datasets-string.h +++ b/src/datasets-string.h @@ -32,6 +32,12 @@ typedef struct StringType { uint8_t *ptr; } StringType; +typedef struct StringTypeJson { + uint32_t len; + DataJsonType json; + uint8_t *ptr; +} StringTypeJson; + int StringSet(void *dst, void *src); bool StringCompare(void *a, void *b); uint32_t StringHash(uint32_t hash_seed, void *s); @@ -39,4 +45,10 @@ uint32_t StringGetLength(void *s); void StringFree(void *s); int StringAsBase64(const void *s, char *out, size_t out_size); +int StringJsonSet(void *dst, void *src); +bool StringJsonCompare(void *a, void *b); +uint32_t StringJsonHash(uint32_t hash_seed, void *s); +void StringJsonFree(void *s); +int StringJsonAsBase64(const void *s, char *out, size_t out_size); + #endif /* SURICATA_DATASETS_STRING_H */ diff --git a/src/datasets.c b/src/datasets.c index 402c7d34fe99..14aa59755a29 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -32,7 +32,9 @@ #include "datasets-md5.h" #include "datasets-sha256.h" #include "datasets-reputation.h" +#include "datasets-json.h" #include "util-conf.h" +#include "util-mem.h" #include "util-thash.h" #include "util-print.h" #include "util-byte.h" @@ -47,6 +49,9 @@ static uint32_t set_ids = 0; static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep); +static int DatasetAddwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json); + static inline void DatasetUnlockData(THashData *d) { (void) THashDecrUsecnt(d); @@ -165,6 +170,22 @@ static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out) return 0; } +static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out) +{ + json_error_t jerror; + json_t *msg; + msg = json_loads(in, 0, &jerror); + if (msg == NULL) { + SCLogWarning("dataset: Invalid json: %s\n", jerror.text); + return -1; + } else { + json_decref(msg); + } + rep_out->len = ins; + rep_out->value = SCStrndup(in, ins); + return 0; +} + static int DatasetLoadIPv4(Dataset *set) { if (strlen(set->load) == 0) @@ -182,6 +203,7 @@ static int DatasetLoadIPv4(Dataset *set) return -1; } + int add_ret; uint32_t cnt = 0; char line[1024]; while (fgets(line, (int)sizeof(line), fp) != NULL) { @@ -205,31 +227,60 @@ static int DatasetLoadIPv4(Dataset *set) /* list with rep data */ } else { line[strlen(line) - 1] = '\0'; - SCLogDebug("IPv4 with REP line: '%s'", line); - *r = '\0'; - struct in_addr in; - if (inet_pton(AF_INET, line, &in) != 1) { - FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line); - continue; - } + if ((*(r + 1) == '{') || (*(r + 1) == '[')) { + struct in_addr in; + if (inet_pton(AF_INET, line, &in) != 1) { + FatalErrorOnInit( + "datajson IPv4 parse failed %s/%s: %s", set->name, set->load, line); + continue; + } - r++; + r++; - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(r, strlen(r), &json) < 0) { + FatalErrorOnInit("bad json value for dataset %s/%s", set->name, set->load); + continue; + } - SCLogDebug("rep v:%u", rep.value); - if (DatasetAddwRep(set, (const uint8_t *)&in.s_addr, 4, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } + add_ret = DatasetAddwJson(set, (const uint8_t *)&in.s_addr, 4, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } - cnt++; + cnt++; + + } else { + SCLogDebug("IPv4 with REP line: '%s'", line); + + struct in_addr in; + if (inet_pton(AF_INET, line, &in) != 1) { + FatalErrorOnInit( + "dataset data parse failed %s/%s: %s", set->name, set->load, line); + continue; + } + + r++; + + DataRepType rep = { .value = 0 }; + if (ParseRepLine(r, strlen(r), &rep) < 0) { + FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); + continue; + } + + SCLogDebug("rep v:%u", rep.value); + if (DatasetAddwRep(set, (const uint8_t *)&in.s_addr, 4, &rep) < 0) { + FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); + continue; + } + + cnt++; + } } } THashConsolidateMemcap(set->hash); @@ -287,6 +338,7 @@ static int DatasetLoadIPv6(Dataset *set) return -1; } + int add_ret; uint32_t cnt = 0; char line[1024]; while (fgets(line, (int)sizeof(line), fp) != NULL) { @@ -314,29 +366,55 @@ static int DatasetLoadIPv6(Dataset *set) SCLogDebug("IPv6 with REP line: '%s'", line); *r = '\0'; + if ((*(r + 1) == '{') || (*(r + 1) == '[')) { - struct in6_addr in6; - int ret = ParseIpv6String(set, line, &in6); - if (ret < 0) { - FatalErrorOnInit("unable to parse IP address"); - continue; - } + struct in6_addr in6; + int ret = ParseIpv6String(set, line, &in6); + if (ret < 0) { + FatalErrorOnInit("unable to parse IP address"); + continue; + } - r++; + r++; - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(r, strlen(r), &json) < 0) { + FatalErrorOnInit("bad json value for dataset %s/%s", set->name, set->load); + continue; + } - SCLogDebug("rep v:%u", rep.value); - if (DatasetAddwRep(set, (const uint8_t *)&in6.s6_addr, 16, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } + add_ret = DatasetAddwJson(set, (const uint8_t *)&in6.s6_addr, 16, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + cnt++; - cnt++; + } else { + struct in6_addr in6; + int ret = ParseIpv6String(set, line, &in6); + if (ret < 0) { + FatalErrorOnInit("unable to parse IP address"); + continue; + } + + r++; + + DataRepType rep = { .value = 0 }; + if (ParseRepLine(r, strlen(r), &rep) < 0) { + FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); + continue; + } + + SCLogDebug("rep v:%u", rep.value); + if (DatasetAddwRep(set, (const uint8_t *)&in6.s6_addr, 16, &rep) < 0) { + FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); + continue; + } + cnt++; + } } } THashConsolidateMemcap(set->hash); @@ -363,6 +441,7 @@ static int DatasetLoadMd5(Dataset *set) return -1; } + int add_ret; uint32_t cnt = 0; char line[1024]; while (fgets(line, (int)sizeof(line), fp) != NULL) { @@ -386,7 +465,7 @@ static int DatasetLoadMd5(Dataset *set) /* list with rep data */ } else if (strlen(line) > 33 && line[32] == ',') { line[strlen(line) - 1] = '\0'; - SCLogDebug("MD5 with REP line: '%s'", line); + SCLogDebug("MD5 with REP/JSON line: '%s'", line); uint8_t hash[16]; if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) { @@ -394,19 +473,38 @@ static int DatasetLoadMd5(Dataset *set) continue; } - DataRepType rep = { .value = 0}; - if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } + if ((line[33] == '{') || (line[33] == '[')) { + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(line + 33, strlen(line) - 33, &json) < 0) { + FatalErrorOnInit("bad json for dataset %s/%s", set->name, set->load); + continue; + } - SCLogDebug("rep v:%u", rep.value); - if (DatasetAddwRep(set, hash, 16, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } + SCLogDebug("json v:%s", json.value); + add_ret = DatasetAddwJson(set, hash, 16, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } - cnt++; + cnt++; + } else { + DataRepType rep = { .value = 0 }; + if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) { + FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); + continue; + } + + SCLogDebug("rep v:%u", rep.value); + if (DatasetAddwRep(set, hash, 16, &rep) < 0) { + FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); + continue; + } + + cnt++; + } } else { FatalErrorOnInit("MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line); @@ -437,6 +535,7 @@ static int DatasetLoadSha256(Dataset *set) return -1; } + int add_ret; uint32_t cnt = 0; char line[1024]; while (fgets(line, (int)sizeof(line), fp) != NULL) { @@ -468,19 +567,38 @@ static int DatasetLoadSha256(Dataset *set) continue; } - DataRepType rep = { .value = 0 }; - if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } + if ((line[65] == '{') || (line[65] == '[')) { + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(line + 65, strlen(line) - 65, &json) < 0) { + FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); + continue; + } - SCLogDebug("rep %u", rep.value); + SCLogDebug("json %s", json.value); - if (DatasetAddwRep(set, hash, 32, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; + add_ret = DatasetAddwJson(set, hash, 32, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + cnt++; + } else { + DataRepType rep = { .value = 0 }; + if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) { + FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); + continue; + } + + SCLogDebug("rep %u", rep.value); + + if (DatasetAddwRep(set, hash, 32, &rep) < 0) { + FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); + continue; + } + cnt++; } - cnt++; } } THashConsolidateMemcap(set->hash); @@ -507,6 +625,7 @@ static int DatasetLoadString(Dataset *set) return -1; } + int add_ret; uint32_t cnt = 0; char line[1024]; while (fgets(line, (int)sizeof(line), fp) != NULL) { @@ -550,20 +669,42 @@ static int DatasetLoadString(Dataset *set) r++; SCLogDebug("r '%s'", r); - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("die: bad rep"); - continue; - } - SCLogDebug("rep %u", rep.value); + if ((*r == '{') || (*r == '[')) { + // coverity[alloc_strlen : FALSE] + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(r, strlen(r), &json) < 0) { + FatalErrorOnInit("die: bad json"); + continue; + } + SCLogDebug("json %s", json.value); - if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; + add_ret = DatasetAddwJson(set, (const uint8_t *)decoded, num_decoded, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + cnt++; + + SCLogDebug("line with json %s, %s", line, r); + } else { + // coverity[alloc_strlen : FALSE] + DataRepType rep = { .value = 0 }; + if (ParseRepLine(r, strlen(r), &rep) < 0) { + FatalErrorOnInit("die: bad rep"); + continue; + } + SCLogDebug("rep %u", rep.value); - SCLogDebug("line with rep %s, %s", line, r); + if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) { + FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); + continue; + } + cnt++; + + SCLogDebug("line with rep %s, %s", line, r); + } } } THashConsolidateMemcap(set->hash); @@ -727,10 +868,10 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, goto out_err; break; case DATASET_TYPE_IPV4: - set->hash = - THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash, IPv4Compare, - NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap, - hashsize > 0 ? hashsize : default_hashsize); + set->hash = THashInit(cnf_name, sizeof(IPv4TypeJson), IPv4Set, IPv4Free, IPv4Hash, + IPv4Compare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); if (set->hash == NULL) goto out_err; if (DatasetLoadIPv4(set) < 0) @@ -772,6 +913,134 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, return NULL; } +Dataset *DatasetJsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize) +{ + uint64_t default_memcap = 0; + uint32_t default_hashsize = 0; + if (strlen(name) > DATASET_NAME_MAX_LEN) { + return NULL; + } + + SCMutexLock(&sets_lock); + Dataset *set = DatasetSearchByName(name); + if (set) { + if (type != DATASET_TYPE_NOTSET && set->type != type) { + SCLogError("dataset %s already " + "exists and is of type %u", + set->name, set->type); + SCMutexUnlock(&sets_lock); + return NULL; + } + + if (load == NULL || strlen(load) == 0) { + // OK, rule keyword doesn't have to set state/load, + // even when yaml set has set it. + } else { + if ((load == NULL && strlen(set->load) > 0) || + (load != NULL && strcmp(set->load, load) != 0)) { + SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load); + SCMutexUnlock(&sets_lock); + return NULL; + } + } + + SCMutexUnlock(&sets_lock); + return set; + } else { + if (type == DATASET_TYPE_NOTSET) { + SCLogError("dataset %s not defined", name); + goto out_err; + } + } + + set = DatasetAlloc(name); + if (set == NULL) { + goto out_err; + } + + strlcpy(set->name, name, sizeof(set->name)); + set->type = type; + if (load && strlen(load)) { + strlcpy(set->load, load, sizeof(set->load)); + SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load); + } + + char cnf_name[128]; + snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name); + + GetDefaultMemcap(&default_memcap, &default_hashsize); + switch (type) { + case DATASET_TYPE_MD5: + set->hash = THashInit(cnf_name, sizeof(Md5TypeJson), Md5StrJsonSet, Md5StrJsonFree, + Md5StrJsonHash, Md5StrJsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetLoadMd5(set) < 0) + goto out_err; + break; + case DATASET_TYPE_STRING: + set->hash = THashInit(cnf_name, sizeof(StringTypeJson), StringJsonSet, StringJsonFree, + StringJsonHash, StringJsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetLoadString(set) < 0) + goto out_err; + break; + case DATASET_TYPE_SHA256: + set->hash = THashInit(cnf_name, sizeof(Sha256TypeJson), Sha256StrJsonSet, + Sha256StrJsonFree, Sha256StrJsonHash, Sha256StrJsonCompare, NULL, NULL, + load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetLoadSha256(set) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV4: + set->hash = THashInit(cnf_name, sizeof(IPv4TypeJson), IPv4JsonSet, IPv4JsonFree, + IPv4JsonHash, IPv4JsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetLoadIPv4(set) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV6: + set->hash = THashInit(cnf_name, sizeof(IPv6TypeJson), IPv6JsonSet, IPv6JsonFree, + IPv6JsonHash, IPv6JsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetLoadIPv6(set) < 0) + goto out_err; + break; + } + + SCLogDebug( + "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load); + + set->next = sets; + sets = set; + + SCMutexUnlock(&sets_lock); + return set; +out_err: + if (set) { + if (set->hash) { + THashShutdown(set->hash); + } + SCFree(set); + } + SCMutexUnlock(&sets_lock); + return NULL; +} static bool DatasetIsStatic(const char *save, const char *load) { /* A set is static if it does not have any dynamic properties like @@ -1140,6 +1409,28 @@ static DataRepResultType DatasetLookupStringwRep(Dataset *set, return rrep; } +static DataJsonResultType DatasetLookupStringwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + StringTypeJson lookup = { + .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0 + }; + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + StringTypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1182,6 +1473,30 @@ static DataRepResultType DatasetLookupIPv4wRep( return rrep; } +static DataJsonResultType DatasetLookupIPv4wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 4) + return rrep; + + IPv4TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv4, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv4TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1224,6 +1539,30 @@ static DataRepResultType DatasetLookupIPv6wRep( return rrep; } +static DataJsonResultType DatasetLookupIPv6wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 16 && data_len != 4) + return rrep; + + IPv6TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv6, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv6TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1266,6 +1605,30 @@ static DataRepResultType DatasetLookupMd5wRep(Dataset *set, return rrep; } +static DataJsonResultType DatasetLookupMd5wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 16) + return rrep; + + Md5TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.md5, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Md5TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1308,6 +1671,30 @@ static DataRepResultType DatasetLookupSha256wRep(Dataset *set, return rrep; } +static DataJsonResultType DatasetLookupSha256wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 32) + return rrep; + + Sha256TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.sha256, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Sha256TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + /** * \brief see if \a data is part of the set * \param set dataset @@ -1359,6 +1746,29 @@ DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uin return rrep; } +DataJsonResultType DatasetLookupwJson(Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = 0 } }; + if (set == NULL) + return rrep; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatasetLookupStringwJson(set, data, data_len); + case DATASET_TYPE_MD5: + return DatasetLookupMd5wJson(set, data, data_len); + case DATASET_TYPE_SHA256: + return DatasetLookupSha256wJson(set, data, data_len); + case DATASET_TYPE_IPV4: + return DatasetLookupIPv4wJson(set, data, data_len); + case DATASET_TYPE_IPV6: + return DatasetLookupIPv6wJson(set, data, data_len); + default: + break; + } + return rrep; +} + /** * \retval 1 data was added to the hash * \retval 0 data was not added to the hash as it is already there @@ -1478,6 +1888,64 @@ static int DatasetAddIPv6wRep( return -1; } +static int DatasetAddIPv4wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len < 4) + return -2; + + IPv4TypeJson lookup = { .json = *json }; + memcpy(lookup.ipv4, data, 4); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatasetAddIPv6wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + IPv6TypeJson lookup = { .json = *json }; + memcpy(lookup.ipv6, data, 16); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +/** + * \retval 1 data was added to the hash + * \retval 0 data was not added to the hash as it is already there + * \retval -1 failed to add data to the hash + */ +static int DatasetAddStringwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + StringTypeJson lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json }; + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1515,6 +1983,25 @@ static int DatasetAddMd5wRep( return -1; } +static int DatasetAddMd5wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + Md5TypeJson lookup = { .json = *json }; + memcpy(lookup.md5, data, 16); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + static int DatasetAddSha256wRep( Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep) { @@ -1534,6 +2021,25 @@ static int DatasetAddSha256wRep( return -1; } +static int DatasetAddSha256wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 32) + return -2; + + Sha256TypeJson lookup = { .json = *json }; + memcpy(lookup.sha256, data, 32); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1593,6 +2099,29 @@ static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data return -1; } +static int DatasetAddwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json) +{ + if (set == NULL) + return -1; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatasetAddStringwJson(set, data, data_len, json); + case DATASET_TYPE_MD5: + return DatasetAddMd5wJson(set, data, data_len, json); + case DATASET_TYPE_SHA256: + return DatasetAddSha256wJson(set, data, data_len, json); + case DATASET_TYPE_IPV4: + return DatasetAddIPv4wJson(set, data, data_len, json); + case DATASET_TYPE_IPV6: + return DatasetAddIPv6wJson(set, data, data_len, json); + default: + break; + } + return -1; +} + typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len); static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString, diff --git a/src/datasets.h b/src/datasets.h index 86bfed02b22f..e4e057920728 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -20,6 +20,7 @@ #include "util-thash.h" #include "datasets-reputation.h" +#include "datasets-json.h" int DatasetsInit(void); void DatasetsDestroy(void); @@ -60,6 +61,9 @@ int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len); int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep); +DataJsonResultType DatasetLookupwJson(Dataset *set, const uint8_t *data, const uint32_t data_len); +Dataset *DatasetJsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize); int DatasetAddSerialized(Dataset *set, const char *string); int DatasetRemoveSerialized(Dataset *set, const char *string); diff --git a/src/decode.h b/src/decode.h index f36c41a8422e..ba7fdaa3a017 100644 --- a/src/decode.h +++ b/src/decode.h @@ -234,6 +234,8 @@ typedef uint16_t Port; #define PKT_IS_TOSERVER(p) (((p)->flowflags & FLOW_PKT_TOSERVER)) #define PKT_IS_TOCLIENT(p) (((p)->flowflags & FLOW_PKT_TOCLIENT)) +#define ALERT_JSON_ARRAY_LEN 4 + /* structure to store the sids/gids/etc the detection engine * found in this packet */ typedef struct PacketAlert_ { @@ -243,6 +245,8 @@ typedef struct PacketAlert_ { const struct Signature_ *s; uint64_t tx_id; /* Used for sorting */ int64_t frame_id; + char *json_strings[ALERT_JSON_ARRAY_LEN]; + int8_t json_array_last_index; } PacketAlert; /* flag to indicate the rule action (drop/pass) needs to be applied to the flow */ diff --git a/src/detect-datajson.c b/src/detect-datajson.c new file mode 100644 index 000000000000..e2adbab55cbc --- /dev/null +++ b/src/detect-datajson.c @@ -0,0 +1,369 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + * + * Based on detect-dataset.c by Victor Julien + * + * Implements the datajson keyword + */ + +#include "suricata-common.h" +#include "detect.h" +#include "datasets.h" +#include "datasets-json.h" +#include "detect-datajson.h" + +#include "detect-parse.h" +#include "detect-engine.h" + +#include "util-debug.h" +#include "util-misc.h" +#include "util-path.h" + +static int DetectDatajsonSetup(DetectEngineCtx *, Signature *, const char *); +void DetectDatajsonFree(DetectEngineCtx *, void *); + +void DetectDatajsonRegister(void) +{ + sigmatch_table[DETECT_DATAJSON].name = "datajson"; + sigmatch_table[DETECT_DATAJSON].desc = + "match sticky buffer against datasets with json extra data (experimental)"; + sigmatch_table[DETECT_DATAJSON].url = "/rules/dataset-keywords.html#datajson"; + sigmatch_table[DETECT_DATAJSON].Setup = DetectDatajsonSetup; + sigmatch_table[DETECT_DATAJSON].Free = DetectDatajsonFree; +} + +/* + 1 match + 0 no match + -1 can't match + */ +int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatajsonData *sd, + const uint8_t *data, const uint32_t data_len) +{ + if (data == NULL || data_len == 0) + return 0; + + switch (sd->cmd) { + case DETECT_DATAJSON_CMD_ISSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatasetLookupwJson(sd->set, data, data_len); + SCLogDebug("r %d", r); + if (!r.found) + return 0; + if (r.json.len > 0) { + if ((det_ctx->json_content_len < SIG_JSON_CONTENT_ARRAY_LEN) && + (r.json.len + strlen(sd->json_key) + 3 < SIG_JSON_CONTENT_ITEM_LEN)) { + snprintf(det_ctx->json_content[det_ctx->json_content_len].json_content, + SIG_JSON_CONTENT_ITEM_LEN, "\"%s\":%s", sd->json_key, r.json.value); + det_ctx->json_content[det_ctx->json_content_len].id = sd->id; + det_ctx->json_content_len++; + } + } + return 1; + } + case DETECT_DATAJSON_CMD_ISNOTSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatasetLookupwJson(sd->set, data, data_len); + SCLogDebug("r %d", r); + if (r.found) + return 0; + return 1; + } + default: + abort(); + } + return 0; +} + +static int DetectDatajsonParse(const char *str, char *cmd, int cmd_len, char *name, int name_len, + enum DatasetTypes *type, char *load, size_t load_size, uint64_t *memcap, uint32_t *hashsize, + char *json_key, size_t json_key_size) +{ + bool cmd_set = false; + bool name_set = false; + bool load_set = false; + + char copy[strlen(str) + 1]; + strlcpy(copy, str, sizeof(copy)); + char *xsaveptr = NULL; + char *key = strtok_r(copy, ",", &xsaveptr); + while (key != NULL) { + while (*key != '\0' && isblank(*key)) { + key++; + } + char *val = strchr(key, ' '); + if (val != NULL) { + *val++ = '\0'; + while (*val != '\0' && isblank(*val)) { + val++; + SCLogDebug("cmd %s val %s", key, val); + } + } else { + SCLogDebug("cmd %s", key); + } + + if (strlen(key) == 0) { + goto next; + } + + if (!cmd_set) { + if (val && strlen(val) != 0) { + return -1; + } + strlcpy(cmd, key, cmd_len); + cmd_set = true; + } else if (!name_set) { + if (val && strlen(val) != 0) { + return -1; + } + strlcpy(name, key, name_len); + name_set = true; + } else { + if (val == NULL) { + return -1; + } + + if (strcmp(key, "type") == 0) { + SCLogDebug("type %s", val); + + if (strcmp(val, "md5") == 0) { + *type = DATASET_TYPE_MD5; + } else if (strcmp(val, "sha256") == 0) { + *type = DATASET_TYPE_SHA256; + } else if (strcmp(val, "string") == 0) { + *type = DATASET_TYPE_STRING; + } else if (strcmp(val, "ipv4") == 0) { + *type = DATASET_TYPE_IPV4; + } else if (strcmp(val, "ipv6") == 0) { + *type = DATASET_TYPE_IPV6; + } else if (strcmp(val, "ip") == 0) { + *type = DATASET_TYPE_IPV6; + } else { + SCLogError("bad type %s", val); + return -1; + } + + } else if (strcmp(key, "load") == 0) { + if (load_set) { + SCLogWarning("'load' can only appear once"); + return -1; + } + SCLogDebug("load %s", val); + strlcpy(load, val, load_size); + load_set = true; + } else if (strcmp(key, "key") == 0) { + if (strlen(key) > json_key_size) { + SCLogWarning("'key' value too long (limit is %" PRIu64 ")", json_key_size); + return -1; + } + strlcpy(json_key, val, json_key_size); + load_set = true; + } + + if (strcmp(key, "memcap") == 0) { + if (ParseSizeStringU64(val, memcap) < 0) { + SCLogWarning("invalid value for memcap: %s," + " resetting to default", + val); + *memcap = 0; + } + } + if (strcmp(key, "hashsize") == 0) { + if (ParseSizeStringU32(val, hashsize) < 0) { + SCLogWarning("invalid value for hashsize: %s," + " resetting to default", + val); + *hashsize = 0; + } + } + } + + SCLogDebug("key: %s, value: %s", key, val); + + next: + key = strtok_r(NULL, ",", &xsaveptr); + } + + /* Trim trailing whitespace. */ + while (strlen(name) > 0 && isblank(name[strlen(name) - 1])) { + name[strlen(name) - 1] = '\0'; + } + + /* Validate name, spaces are not allowed. */ + for (size_t i = 0; i < strlen(name); i++) { + if (isblank(name[i])) { + SCLogError("spaces not allowed in dataset names"); + return 0; + } + } + + return 1; +} + +/** \brief wrapper around dirname that does leave input untouched */ +static void GetDirName(const char *in, char *out, size_t outs) +{ + if (strlen(in) == 0) { + return; + } + + size_t size = strlen(in) + 1; + char tmp[size]; + strlcpy(tmp, in, size); + + char *dir = dirname(tmp); + BUG_ON(dir == NULL); + strlcpy(out, dir, outs); + return; +} + +static int SetupLoadPath(const DetectEngineCtx *de_ctx, char *load, size_t load_size) +{ + SCLogDebug("load %s", load); + + if (PathIsAbsolute(load)) { + return 0; + } + + bool done = false; +#ifdef HAVE_LIBGEN_H + BUG_ON(de_ctx->rule_file == NULL); + + char dir[PATH_MAX] = ""; + GetDirName(de_ctx->rule_file, dir, sizeof(dir)); + + SCLogDebug("rule_file %s dir %s", de_ctx->rule_file, dir); + char path[PATH_MAX]; + if (snprintf(path, sizeof(path), "%s/%s", dir, load) >= (int)sizeof(path)) // TODO windows path + return -1; + + if (SCPathExists(path)) { + done = true; + strlcpy(load, path, load_size); + SCLogDebug("using path '%s' (HAVE_LIBGEN_H)", load); + } +#endif + if (!done) { + char *loadp = DetectLoadCompleteSigPath(de_ctx, load); + if (loadp == NULL) { + return -1; + } + SCLogDebug("loadp %s", loadp); + + if (SCPathExists(loadp)) { + strlcpy(load, loadp, load_size); + SCLogDebug("using path '%s' (non-HAVE_LIBGEN_H)", load); + } + SCFree(loadp); + } + return 0; +} + +int DetectDatajsonSetup(DetectEngineCtx *de_ctx, Signature *s, const char *rawstr) +{ + DetectDatajsonData *cd = NULL; + SigMatch *sm = NULL; + uint8_t cmd = 0; + uint64_t memcap = 0; + uint32_t hashsize = 0; + char cmd_str[16] = "", name[DATASET_NAME_MAX_LEN + 1] = ""; + enum DatasetTypes type = DATASET_TYPE_NOTSET; + char load[PATH_MAX] = ""; + char json_key[SIG_JSON_CONTENT_KEY_LEN] = ""; + size_t json_key_size = SIG_JSON_CONTENT_KEY_LEN; + + if (DetectBufferGetActiveList(de_ctx, s) == -1) { + SCLogError("datajson is only supported for sticky buffers"); + SCReturnInt(-1); + } + + int list = s->init_data->list; + if (list == DETECT_SM_LIST_NOTSET) { + SCLogError("datajson is only supported for sticky buffers"); + SCReturnInt(-1); + } + + if (!DetectDatajsonParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load, + sizeof(load), &memcap, &hashsize, json_key, json_key_size)) { + return -1; + } + + if (strcmp(cmd_str, "isset") == 0) { + cmd = DETECT_DATAJSON_CMD_ISSET; + } else if (strcmp(cmd_str, "isnotset") == 0) { + cmd = DETECT_DATAJSON_CMD_ISNOTSET; + } else { + SCLogError("datajson action \"%s\" is not supported.", cmd_str); + return -1; + } + + if (strlen(load) != 0) { + if (SetupLoadPath(de_ctx, load, sizeof(load)) != 0) + return -1; + } + + if (strlen(json_key) == 0) { + SCLogError("datajson needs a key parameter"); + return -1; + } + + SCLogDebug("name '%s' load '%s' save '%s'", name, load, save); + Dataset *set = DatasetJsonGet(name, type, load, memcap, hashsize); + if (set == NULL) { + SCLogError("failed to set up datajson '%s'.", name); + return -1; + } + if (set->hash && SC_ATOMIC_GET(set->hash->memcap_reached)) { + SCLogError("datajson too large for set memcap"); + return -1; + } + + cd = SCCalloc(1, sizeof(DetectDatajsonData)); + if (unlikely(cd == NULL)) + goto error; + + cd->set = set; + cd->cmd = cmd; + strlcpy(cd->json_key, json_key, json_key_size); + cd->id = s; + + SCLogDebug("cmd %s, name %s", cmd_str, strlen(name) ? name : "(none)"); + + SigMatchAppendSMToList(de_ctx, s, DETECT_DATAJSON, (SigMatchCtx *)cd, list); + return 0; + +error: + if (cd != NULL) + SCFree(cd); + if (sm != NULL) + SCFree(sm); + return -1; +} + +void DetectDatajsonFree(DetectEngineCtx *de_ctx, void *ptr) +{ + DetectDatajsonData *fd = (DetectDatajsonData *)ptr; + if (fd == NULL) + return; + + SCFree(fd); +} diff --git a/src/detect-datajson.h b/src/detect-datajson.h new file mode 100644 index 000000000000..ae63cab28dba --- /dev/null +++ b/src/detect-datajson.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2023 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef __DETECT_DATAJSON_H__ +#define __DETECT_DATAJSON_H__ + +#include "datasets.h" + +#define DETECT_DATAJSON_CMD_ISSET 1 +#define DETECT_DATAJSON_CMD_ISNOTSET 2 + +typedef struct DetectDatajsonData_ { + Dataset *set; + uint8_t cmd; + DataJsonType json; + char json_key[SIG_JSON_CONTENT_KEY_LEN]; + void *id; +} DetectDatajsonData; + +int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatajsonData *sd, + const uint8_t *data, const uint32_t data_len); + +/* prototypes */ +void DetectDatajsonRegister(void); + +#endif /* __DETECT_DATAJSON_H__ */ diff --git a/src/detect-engine-alert.c b/src/detect-engine-alert.c index 01452ecf89ae..e78a5f7c8ba2 100644 --- a/src/detect-engine-alert.c +++ b/src/detect-engine-alert.c @@ -274,6 +274,24 @@ static inline PacketAlert PacketAlertSet( /* Set tx_id if the frame has it */ pa.tx_id = tx_id; pa.frame_id = (alert_flags & PACKET_ALERT_FLAG_FRAME) ? det_ctx->frame_id : 0; + pa.json_array_last_index = -1; + if (det_ctx->json_content_len) { + /* We have some JSON attached in the current detection so let's try + to see if some need to be used for current signature. */ + for (size_t i = 0; i < det_ctx->json_content_len; i++) { + if (s == det_ctx->json_content[i].id) { + pa.json_array_last_index++; + if (pa.json_array_last_index < ALERT_JSON_ARRAY_LEN) { + pa.json_strings[pa.json_array_last_index] = + det_ctx->json_content[i].json_content; + } else { + SCLogDebug("Not enough room for json data in packet alert"); + pa.json_array_last_index--; + break; + } + } + } + } return pa; } diff --git a/src/detect-engine-content-inspection.c b/src/detect-engine-content-inspection.c index d4dab42816d5..e96536905ab8 100644 --- a/src/detect-engine-content-inspection.c +++ b/src/detect-engine-content-inspection.c @@ -52,6 +52,7 @@ #include "detect-base64-data.h" #include "detect-dataset.h" #include "detect-datarep.h" +#include "detect-datajson.h" #include "util-spm.h" #include "util-debug.h" @@ -631,6 +632,16 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx, } goto no_match_discontinue; + } else if (smd->type == DETECT_DATAJSON) { + + // PrintRawDataFp(stdout, buffer, buffer_len); + const DetectDatajsonData *sd = (const DetectDatajsonData *)smd->ctx; + int r = DetectDatajsonBufferMatch(det_ctx, sd, buffer, buffer_len); // TODO buffer offset? + if (r == 1) { + goto match; + } + goto no_match_discontinue; + } else if (smd->type == DETECT_AL_URILEN) { SCLogDebug("inspecting uri len"); @@ -646,8 +657,7 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx, goto match; } goto no_match_discontinue; - } - else if (smd->type == DETECT_LUA) { + } else if (smd->type == DETECT_LUA) { SCLogDebug("lua starting"); if (DetectLuaMatchBuffer(det_ctx, s, smd, buffer, buffer_len, diff --git a/src/detect-engine-register.c b/src/detect-engine-register.c index 37fbc98d8597..400be590370c 100644 --- a/src/detect-engine-register.c +++ b/src/detect-engine-register.c @@ -140,6 +140,7 @@ #include "detect-filesize.h" #include "detect-dataset.h" #include "detect-datarep.h" +#include "detect-datajson.h" #include "detect-dsize.h" #include "detect-flowvar.h" #include "detect-flowint.h" @@ -571,6 +572,7 @@ void SigTableSetup(void) DetectDsizeRegister(); DetectDatasetRegister(); DetectDatarepRegister(); + DetectDatajsonRegister(); DetectFlowvarRegister(); DetectFlowintRegister(); DetectPktvarRegister(); diff --git a/src/detect-engine-register.h b/src/detect-engine-register.h index c9134c77b83a..aef2951443cd 100644 --- a/src/detect-engine-register.h +++ b/src/detect-engine-register.h @@ -86,6 +86,7 @@ enum DetectKeywordId { DETECT_BYTE_EXTRACT, DETECT_DATASET, DETECT_DATAREP, + DETECT_DATAJSON, DETECT_BASE64_DECODE, DETECT_BASE64_DATA, DETECT_BSIZE, diff --git a/src/detect.c b/src/detect.c index 03fa8437068d..28ea1623768d 100644 --- a/src/detect.c +++ b/src/detect.c @@ -862,6 +862,7 @@ static DetectRunScratchpad DetectRunSetup( det_ctx->base64_decoded_len = 0; det_ctx->raw_stream_progress = 0; det_ctx->match_array_cnt = 0; + det_ctx->json_content_len = 0; det_ctx->alert_queue_size = 0; p->alerts.drop.action = 0; diff --git a/src/detect.h b/src/detect.h index fe755b7f0d14..dcac5f261476 100644 --- a/src/detect.h +++ b/src/detect.h @@ -40,6 +40,7 @@ #include "util-radix-tree.h" #include "util-file.h" #include "reputation.h" +#include #define DETECT_MAX_RULE_SIZE 8192 @@ -1087,6 +1088,16 @@ typedef struct RuleMatchCandidateTx { const Signature *s; /**< ptr to sig */ } RuleMatchCandidateTx; +#define SIG_JSON_CONTENT_ARRAY_LEN 16 +#define SIG_JSON_CONTENT_ITEM_LEN 256 +#define SIG_JSON_CONTENT_KEY_LEN 32 + +/** structure to store the json content with info on sig that triggered it */ +typedef struct SigJsonContent { + void *id; + char json_content[SIG_JSON_CONTENT_ITEM_LEN]; +} SigJsonContent; + /** * Detection engine thread data. */ @@ -1136,6 +1147,9 @@ typedef struct DetectEngineThreadCtx_ { int base64_decoded_len; int base64_decoded_len_max; + SigJsonContent json_content[SIG_JSON_CONTENT_ARRAY_LEN]; + size_t json_content_len; + /* counter for the filestore array below -- up here for cache reasons. */ uint16_t filestore_cnt; diff --git a/src/output-json-alert.c b/src/output-json-alert.c index 7822cc798045..5db00ba53014 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -250,6 +250,13 @@ void AlertJsonHeader(const Packet *p, const PacketAlert *pa, JsonBuilder *js, ui AlertJsonMetadata(pa, js); } + if (pa->json_array_last_index >= 0) { + jb_open_object(js, "extra"); + for (int8_t i = 0; i <= pa->json_array_last_index; i++) { + jb_set_formatted(js, pa->json_strings[i]); + } + jb_close(js); + } if (flags & LOG_JSON_RULE) { jb_set_string(js, "rule", pa->s->sig_str); } From 997805db3745b09aecbc01829a803a58e90a9c36 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 3 Oct 2024 22:40:35 +0200 Subject: [PATCH 2/8] detect/dataset: update copyright date --- src/detect-dataset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/detect-dataset.c b/src/detect-dataset.c index ae23925f2c11..f702b7b91271 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2020 Open Information Security Foundation +/* Copyright (C) 2018-2024 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free From a67bd9baba89fd6e69fcb190a207e764f7c7e7e7 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 3 Oct 2024 23:04:07 +0200 Subject: [PATCH 3/8] docs: basic datajson documentation Ticket: #7372 --- doc/userguide/rules/datasets.rst | 35 ++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/doc/userguide/rules/datasets.rst b/doc/userguide/rules/datasets.rst index bf6ab9b1edd3..3f2d80cd73be 100644 --- a/doc/userguide/rules/datasets.rst +++ b/doc/userguide/rules/datasets.rst @@ -3,8 +3,8 @@ Datasets ======== -Using the ``dataset`` and ``datarep`` keyword it is possible to match on -large amounts of data against any sticky buffer. +Using the ``dataset`` and ``datarep`` and ``datajson`` keyword it is possible +to match on large amounts of data against any sticky buffer. For example, to match against a DNS black list called ``dns-bl``:: @@ -145,6 +145,26 @@ reputation lists. A MD5 list, a SHA256 list, and a raw string (buffer) list. The rules will only match if the data is in the list and the reputation value is higher than 200. +datajson +~~~~~~~~ + +DataJSON allows matching data against a set and output data attached to the matching +value in the event. + +Syntax:: + + datajson:,,; + + datajson:, \ + [, type , load , memcap , hashsize , key ]; + +Example rules could look like:: + + alert http any any -> any any (msg:"IP match"; ip.dst; datajson:isset,bad_ips, type ip, load bad_ips.csv, key bad_ones; sid:8000001;) + +In this example, the match will occur if the destination IP is in the set and the +alert will have an ``alert.extra.bad_ones`` subobject that will contain the JSON +data associated to the value. Rule Reloads ------------ @@ -292,6 +312,17 @@ Syntax:: , + +datajson +~~~~~~~~ + +The datajson format follows the dataset, expect that there are 1 more CSV +field: + +Syntax:: + + , + .. _datasets_file_locations: File Locations From 0f012193365144242ac21de9b2eaed44f1a8508a Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sun, 13 Oct 2024 10:08:45 +0200 Subject: [PATCH 4/8] detect/datajson: store json info in a list Previous code was using an array and introducing a limit in the number of datajson keywords that can be used in a signature. This patch uses a linked list instead to overcome the limit. By using a first element of the list that is part of the structure we limit the cost of the feature to a structure member added to PacketAlert structure. Only the PacketAlertFree function is impacted as we need to iterate to find potential allocation. Ticket: #7372 --- src/decode.c | 29 +++++++++++++++++++++++++++++ src/decode.h | 9 ++++++--- src/detect-engine-alert.c | 23 ++++++++++++++--------- src/output-json-alert.c | 8 +++++--- src/packet.c | 1 + 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/decode.c b/src/decode.c index 0ed546def225..8ca49e97200e 100644 --- a/src/decode.c +++ b/src/decode.c @@ -145,9 +145,38 @@ PacketAlert *PacketAlertCreate(void) return pa_array; } +void PacketAlertRecycle(PacketAlert *pa_array) +{ + if (pa_array != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + if (pa_array[i].json_info.next != NULL) { + struct JsonInfoList *current_json = pa_array[i].json_info.next; + while (current_json) { + struct JsonInfoList *next_json = current_json->next; + SCFree(current_json); + current_json = next_json; + } + } + pa_array[i].json_info.json_string = NULL; + pa_array[i].json_info.next = NULL; + } + } +} + void PacketAlertFree(PacketAlert *pa) { if (pa != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + /* first item is not allocated so start at second one */ + if (pa[i].json_info.next != NULL) { + struct JsonInfoList *current_json = pa[i].json_info.next; + while (current_json) { + struct JsonInfoList *next_json = current_json->next; + SCFree(current_json); + current_json = next_json; + } + } + } SCFree(pa); } } diff --git a/src/decode.h b/src/decode.h index ba7fdaa3a017..75aef5432bec 100644 --- a/src/decode.h +++ b/src/decode.h @@ -234,7 +234,10 @@ typedef uint16_t Port; #define PKT_IS_TOSERVER(p) (((p)->flowflags & FLOW_PKT_TOSERVER)) #define PKT_IS_TOCLIENT(p) (((p)->flowflags & FLOW_PKT_TOCLIENT)) -#define ALERT_JSON_ARRAY_LEN 4 +struct JsonInfoList { + char *json_string; + struct JsonInfoList *next; +}; /* structure to store the sids/gids/etc the detection engine * found in this packet */ @@ -245,8 +248,7 @@ typedef struct PacketAlert_ { const struct Signature_ *s; uint64_t tx_id; /* Used for sorting */ int64_t frame_id; - char *json_strings[ALERT_JSON_ARRAY_LEN]; - int8_t json_array_last_index; + struct JsonInfoList json_info; } PacketAlert; /* flag to indicate the rule action (drop/pass) needs to be applied to the flow */ @@ -276,6 +278,7 @@ typedef struct PacketAlerts_ { } PacketAlerts; PacketAlert *PacketAlertCreate(void); +void PacketAlertRecycle(PacketAlert *pa_array); void PacketAlertFree(PacketAlert *pa); diff --git a/src/detect-engine-alert.c b/src/detect-engine-alert.c index e78a5f7c8ba2..3d2a132f9c1f 100644 --- a/src/detect-engine-alert.c +++ b/src/detect-engine-alert.c @@ -274,21 +274,26 @@ static inline PacketAlert PacketAlertSet( /* Set tx_id if the frame has it */ pa.tx_id = tx_id; pa.frame_id = (alert_flags & PACKET_ALERT_FLAG_FRAME) ? det_ctx->frame_id : 0; - pa.json_array_last_index = -1; + pa.json_info.json_string = NULL; + pa.json_info.next = NULL; if (det_ctx->json_content_len) { /* We have some JSON attached in the current detection so let's try to see if some need to be used for current signature. */ + struct JsonInfoList *current_json = &pa.json_info; for (size_t i = 0; i < det_ctx->json_content_len; i++) { if (s == det_ctx->json_content[i].id) { - pa.json_array_last_index++; - if (pa.json_array_last_index < ALERT_JSON_ARRAY_LEN) { - pa.json_strings[pa.json_array_last_index] = - det_ctx->json_content[i].json_content; - } else { - SCLogDebug("Not enough room for json data in packet alert"); - pa.json_array_last_index--; - break; + if (current_json->json_string != NULL) { + struct JsonInfoList *next_json = SCCalloc(1, sizeof(struct JsonInfoList)); + if (next_json) { + current_json->next = next_json; + current_json = next_json; + current_json->next = NULL; + } else { + /* Allocation error, let's return now */ + return pa; + } } + current_json->json_string = det_ctx->json_content[i].json_content; } } } diff --git a/src/output-json-alert.c b/src/output-json-alert.c index 5db00ba53014..4cc3c6b3e785 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -250,10 +250,12 @@ void AlertJsonHeader(const Packet *p, const PacketAlert *pa, JsonBuilder *js, ui AlertJsonMetadata(pa, js); } - if (pa->json_array_last_index >= 0) { + if (pa->json_info.json_string != NULL) { jb_open_object(js, "extra"); - for (int8_t i = 0; i <= pa->json_array_last_index; i++) { - jb_set_formatted(js, pa->json_strings[i]); + const struct JsonInfoList *json_info = &pa->json_info; + while (json_info) { + jb_set_formatted(js, json_info->json_string); + json_info = json_info->next; } jb_close(js); } diff --git a/src/packet.c b/src/packet.c index cb6dcf618380..b30bca044ac1 100644 --- a/src/packet.c +++ b/src/packet.c @@ -125,6 +125,7 @@ void PacketReinit(Packet *p) p->alerts.discarded = 0; p->alerts.suppressed = 0; p->alerts.drop.action = 0; + PacketAlertRecycle(p->alerts.alerts); p->pcap_cnt = 0; p->tunnel_rtv_cnt = 0; p->tunnel_tpr_cnt = 0; From 76727986eb9f305757de2d1c05726bf9f516f2d0 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 17 Oct 2024 10:16:13 +0200 Subject: [PATCH 5/8] datajson: unix commands to add/remove Ticket: #7372 --- python/suricata/sc/specs.py | 32 ++++++ python/suricata/sc/suricatasc.py | 2 + src/datasets.c | 185 +++++++++++++++++++++++++++++++ src/datasets.h | 3 + src/runmode-unix-socket.c | 121 ++++++++++++++++++++ src/runmode-unix-socket.h | 3 + src/unix-manager.c | 3 + 7 files changed, 349 insertions(+) diff --git a/python/suricata/sc/specs.py b/python/suricata/sc/specs.py index c7e045873303..58917c4ab9a0 100644 --- a/python/suricata/sc/specs.py +++ b/python/suricata/sc/specs.py @@ -194,6 +194,38 @@ "required": 1, }, ], + "datajson-add": [ + { + "name": "setname", + "required": 1, + }, + { + "name": "settype", + "required": 1, + }, + { + "name": "datavalue", + "required": 1, + }, + { + "name": "datajson", + "required": 1, + }, + ], + "datajson-remove": [ + { + "name": "setname", + "required": 1, + }, + { + "name": "settype", + "required": 1, + }, + { + "name": "datavalue", + "required": 1, + }, + ], "get-flow-stats-by-id": [ { "name": "flow_id", diff --git a/python/suricata/sc/suricatasc.py b/python/suricata/sc/suricatasc.py index fc07037c3d22..9b21d11bbe1a 100644 --- a/python/suricata/sc/suricatasc.py +++ b/python/suricata/sc/suricatasc.py @@ -113,6 +113,8 @@ def __init__(self, sck_path, verbose=False): "memcap-show", "dataset-add", "dataset-remove", + "datajson-add", + "datajson-remove", "get-flow-stats-by-id", "dataset-clear", "dataset-lookup", diff --git a/src/datasets.c b/src/datasets.c index 14aa59755a29..d939730b7dc8 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -2300,3 +2300,188 @@ int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len) } return -1; } + +typedef int (*DatajsonOpFunc)( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json); + +static int DatajsonOpSerialized(Dataset *set, const char *string, const char *json, + DatajsonOpFunc DatasetOpString, DatajsonOpFunc DatasetOpMd5, DatajsonOpFunc DatasetOpSha256, + DatajsonOpFunc DatasetOpIPv4, DatajsonOpFunc DatasetOpIPv6) +{ + int ret; + + if (set == NULL) + return -1; + if (strlen(string) == 0) + return -1; + + DataJsonType jvalue = { .value = NULL, .len = 0 }; + if (json) { + if (ParseJsonLine(json, strlen(json), &jvalue) < 0) { + SCLogNotice("bad json value for dataset %s/%s", set->name, set->load); + return -1; + } + } + + switch (set->type) { + case DATASET_TYPE_STRING: { + uint32_t decoded_size = Base64DecodeBufferSize(strlen(string)); + uint8_t decoded[decoded_size]; + uint32_t num_decoded = Base64Decode( + (const uint8_t *)string, strlen(string), Base64ModeStrict, decoded); + if (num_decoded == 0) + goto operror; + ret = DatasetOpString(set, decoded, num_decoded, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_MD5: { + if (strlen(string) != 32) + goto operror; + uint8_t hash[16]; + if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0) + goto operror; + ret = DatasetOpMd5(set, hash, 16, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_SHA256: { + if (strlen(string) != 64) + goto operror; + uint8_t hash[32]; + if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0) + goto operror; + ret = DatasetOpSha256(set, hash, 32, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV4: { + struct in_addr in; + if (inet_pton(AF_INET, string, &in) != 1) + goto operror; + ret = DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV6: { + struct in6_addr in6; + if (ParseIpv6String(set, string, &in6) != 0) { + SCLogError("Dataset failed to import %s as IPv6", string); + goto operror; + } + ret = DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + } + return -1; +operror: + SCFree(jvalue.value); + return -2; +} + +/** \brief add serialized data to json set + * \retval int 1 added + * \retval int 0 already in hash + * \retval int -1 API error (not added) + * \retval int -2 DATA error + */ +int DatajsonAddSerialized(Dataset *set, const char *value, const char *json) +{ + return DatajsonOpSerialized(set, value, json, DatasetAddStringwJson, DatasetAddMd5wJson, + DatasetAddSha256wJson, DatasetAddIPv4wJson, DatasetAddIPv6wJson); +} + +/** + * \retval 1 data was removed from the hash + * \retval 0 data not removed (busy) + * \retval -1 data not found + */ +static int DatajsonRemoveString( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + StringTypeJson lookup = { + .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0 + }; + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveIPv4( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 4) + return -2; + + IPv4TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv4, data, 4); + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveIPv6( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + IPv6TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv6, data, 16); + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveMd5( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + Md5TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.md5, data, 16); + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveSha256( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 32) + return -2; + + Sha256TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.sha256, data, 32); + return THashRemoveFromHash(set->hash, &lookup); +} + +/** \brief remove serialized data from set + * \retval int 1 removed + * \retval int 0 found but busy (not removed) + * \retval int -1 API error (not removed) + * \retval int -2 DATA error */ +int DatajsonRemoveSerialized(Dataset *set, const char *string) +{ + return DatajsonOpSerialized(set, string, NULL, DatajsonRemoveString, DatajsonRemoveMd5, + DatajsonRemoveSha256, DatajsonRemoveIPv4, DatajsonRemoveIPv6); +} diff --git a/src/datasets.h b/src/datasets.h index e4e057920728..660eb4e5ecad 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -69,4 +69,7 @@ int DatasetAddSerialized(Dataset *set, const char *string); int DatasetRemoveSerialized(Dataset *set, const char *string); int DatasetLookupSerialized(Dataset *set, const char *string); +int DatajsonAddSerialized(Dataset *set, const char *string, const char *json); +int DatajsonRemoveSerialized(Dataset *set, const char *string); + #endif /* SURICATA_DATASETS_H */ diff --git a/src/runmode-unix-socket.c b/src/runmode-unix-socket.c index e0b314a1cd2a..2d7c515ce22f 100644 --- a/src/runmode-unix-socket.c +++ b/src/runmode-unix-socket.c @@ -831,6 +831,127 @@ TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data) } } +/** + * \brief Command to add data to a datajson + * + * \param cmd the content of command Arguments as a json_t object + * \param answer the json_t object that has to be used to answer + * \param data pointer to data defining the context here a PcapCommand:: + */ +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data) +{ + /* 1 get dataset name */ + json_t *narg = json_object_get(cmd, "setname"); + if (!json_is_string(narg)) { + json_object_set_new(answer, "message", json_string("setname is not a string")); + return TM_ECODE_FAILED; + } + const char *set_name = json_string_value(narg); + + /* 2 get the data type */ + json_t *targ = json_object_get(cmd, "settype"); + if (!json_is_string(targ)) { + json_object_set_new(answer, "message", json_string("settype is not a string")); + return TM_ECODE_FAILED; + } + const char *type = json_string_value(targ); + + /* 3 get value */ + json_t *varg = json_object_get(cmd, "datavalue"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datavalue is not string")); + return TM_ECODE_FAILED; + } + const char *value = json_string_value(varg); + + /* 4 get json */ + json_t *jarg = json_object_get(cmd, "datajson"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datajson is not string")); + return TM_ECODE_FAILED; + } + const char *json = json_string_value(jarg); + + SCLogDebug("datajson-add: %s type %s value %s json %s", set_name, type, value, json); + + enum DatasetTypes t = DatasetGetTypeFromString(type); + if (t == DATASET_TYPE_NOTSET) { + json_object_set_new(answer, "message", json_string("unknown settype")); + return TM_ECODE_FAILED; + } + + Dataset *set = DatasetFind(set_name, t); + if (set == NULL) { + json_object_set_new(answer, "message", json_string("set not found or wrong type")); + return TM_ECODE_FAILED; + } + + int r = DatajsonAddSerialized(set, value, json); + if (r == 1) { + json_object_set_new(answer, "message", json_string("data added")); + return TM_ECODE_OK; + } else if (r == 0) { + json_object_set_new(answer, "message", json_string("data already in set")); + return TM_ECODE_OK; + } else { + json_object_set_new(answer, "message", json_string("failed to add data")); + return TM_ECODE_FAILED; + } +} + +TmEcode UnixSocketDatajsonRemove(json_t *cmd, json_t *answer, void *data) +{ + /* 1 get dataset name */ + json_t *narg = json_object_get(cmd, "setname"); + if (!json_is_string(narg)) { + json_object_set_new(answer, "message", json_string("setname is not a string")); + return TM_ECODE_FAILED; + } + const char *set_name = json_string_value(narg); + + /* 2 get the data type */ + json_t *targ = json_object_get(cmd, "settype"); + if (!json_is_string(targ)) { + json_object_set_new(answer, "message", json_string("settype is not a string")); + return TM_ECODE_FAILED; + } + const char *type = json_string_value(targ); + + /* 3 get value */ + json_t *varg = json_object_get(cmd, "datavalue"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datavalue is not string")); + return TM_ECODE_FAILED; + } + const char *value = json_string_value(varg); + + SCLogDebug("datajson-remove: %s type %s value %s", set_name, type, value); + + enum DatasetTypes t = DatasetGetTypeFromString(type); + if (t == DATASET_TYPE_NOTSET) { + json_object_set_new(answer, "message", json_string("unknown settype")); + return TM_ECODE_FAILED; + } + + Dataset *set = DatasetFind(set_name, t); + if (set == NULL) { + json_object_set_new(answer, "message", json_string("set not found or wrong type")); + return TM_ECODE_FAILED; + } + + int r = DatajsonRemoveSerialized(set, value); + if (r == 1) { + json_object_set_new(answer, "message", json_string("data removed")); + return TM_ECODE_OK; + } else if (r == 0) { + json_object_set_new(answer, "message", json_string("data is busy, try again")); + return TM_ECODE_OK; + } else { + json_object_set_new(answer, "message", json_string("failed to remove data")); + return TM_ECODE_FAILED; + } +} + static bool JsonU32Value(json_t *jarg, uint32_t *ret) { int64_t r = json_integer_value(jarg); diff --git a/src/runmode-unix-socket.h b/src/runmode-unix-socket.h index 8ea432f57dad..d1f90a6e8456 100644 --- a/src/runmode-unix-socket.h +++ b/src/runmode-unix-socket.h @@ -38,6 +38,9 @@ TmEcode UnixSocketDatasetRemove(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketDatasetDump(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetClear(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonRemove(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonReplace(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketRegisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketUnregisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketRegisterTenant(json_t *cmd, json_t* answer, void *data); diff --git a/src/unix-manager.c b/src/unix-manager.c index 38baaac22011..ba0a04e8d0cb 100644 --- a/src/unix-manager.c +++ b/src/unix-manager.c @@ -1111,6 +1111,9 @@ int UnixManagerInit(void) UnixManagerRegisterCommand("dataset-add", UnixSocketDatasetAdd, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-remove", UnixSocketDatasetRemove, &command, UNIX_CMD_TAKE_ARGS); + UnixManagerRegisterCommand("datajson-add", UnixSocketDatajsonAdd, &command, UNIX_CMD_TAKE_ARGS); + UnixManagerRegisterCommand( + "datajson-remove", UnixSocketDatajsonRemove, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand( "get-flow-stats-by-id", UnixSocketGetFlowStatsById, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-dump", UnixSocketDatasetDump, NULL, 0); From b50b4e83bfed11dcd1f67d19f00ea4eb7149b6d7 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sat, 2 Nov 2024 11:19:48 +0100 Subject: [PATCH 6/8] docs: document datajson unix-socket commands Ticket: #7372 --- doc/userguide/rules/datasets.rst | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/doc/userguide/rules/datasets.rst b/doc/userguide/rules/datasets.rst index 3f2d80cd73be..8cd7e32bd1e4 100644 --- a/doc/userguide/rules/datasets.rst +++ b/doc/userguide/rules/datasets.rst @@ -263,6 +263,44 @@ Syntax:: dataset-dump +datajson-add +~~~~~~~~~~~~ + +Unix Socket command to add data to a set. On success, the addition becomes +active instantly. + +Syntax:: + + datajson-add + +set name + Name of an already defined dataset +type + Data type: string, md5, sha256, ipv4, ip +data + Data to add in serialized form (base64 for string, hex notation for md5/sha256, string representation for ipv4/ip) + +Example adding 'google.com' to set 'myset':: + + datajson-add myset string Z29vZ2xlLmNvbQ== {"city":"Mountain View"} + +datajson-remove +~~~~~~~~~~~~~~~ + +Unix Socket command to remove data from a set. On success, the removal becomes +active instantly. + +Syntax:: + + datajson-remove + +set name + Name of an already defined dataset +type + Data type: string, md5, sha256, ipv4, ip +data + Data to remove in serialized form (base64 for string, hex notation for md5/sha256, string representation for ipv4/ip) + File formats ------------ From 6061d014a329b71ba5f25194dc5191bca721cf41 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sat, 2 Nov 2024 21:19:13 +0100 Subject: [PATCH 7/8] datajson: fix unix socket add command It was not handling correctly the json values with space as they were seen as multiple arguments. Ticket: #7372 --- python/suricata/sc/suricatasc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/suricata/sc/suricatasc.py b/python/suricata/sc/suricatasc.py index 9b21d11bbe1a..350c611b67c8 100644 --- a/python/suricata/sc/suricatasc.py +++ b/python/suricata/sc/suricatasc.py @@ -220,6 +220,11 @@ def execute(self, command): cmd_specs = argsd[cmd] required_args_count = len([d["required"] for d in cmd_specs if d["required"] and not "val" in d]) arguments = dict() + # if all arguments are required in the command then we split at the count + # this way we can handle last argument containing space (datajson-add for example) + non_req_args_count = len([d for d in cmd_specs if not d["required"] or "val" in d]) + if non_req_args_count == 0: + full_cmd = command.split(maxsplit=required_args_count) for c, spec in enumerate(cmd_specs, 1): spec_type = str if "type" not in spec else spec["type"] if spec["required"]: From 45233eca27e7f1e3839d305874173894cc76bf37 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 13 Nov 2024 01:55:57 +0100 Subject: [PATCH 8/8] detect/datajson: JSON can be long --- src/detect.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/detect.h b/src/detect.h index dcac5f261476..9b8d8cd5f12e 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1089,7 +1089,7 @@ typedef struct RuleMatchCandidateTx { } RuleMatchCandidateTx; #define SIG_JSON_CONTENT_ARRAY_LEN 16 -#define SIG_JSON_CONTENT_ITEM_LEN 256 +#define SIG_JSON_CONTENT_ITEM_LEN 1024 #define SIG_JSON_CONTENT_KEY_LEN 32 /** structure to store the json content with info on sig that triggered it */