From 35b25224ec197b45f887e5d71ed546ee2d382721 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosso Date: Tue, 5 Nov 2024 11:14:43 +0000 Subject: [PATCH] feat(transaction): data type prediction for lazy predictions --- internal/corazarules/rule_match.go | 2 + types/value_metadata.go | 84 ++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 types/value_metadata.go diff --git a/internal/corazarules/rule_match.go b/internal/corazarules/rule_match.go index 67f8f7dc1..2ccf5b988 100644 --- a/internal/corazarules/rule_match.go +++ b/internal/corazarules/rule_match.go @@ -29,6 +29,8 @@ type MatchData struct { // Keeps track of the chain depth in which the data matched. // Multiphase specific field ChainLevel_ int + // Metadata of the matched data + Metadata_ types.DataMetadataList } var _ types.MatchData = (*MatchData)(nil) diff --git a/types/value_metadata.go b/types/value_metadata.go new file mode 100644 index 000000000..b02a74cb8 --- /dev/null +++ b/types/value_metadata.go @@ -0,0 +1,84 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 +package types + +import "unicode" + +// ValueMetadata is the type of metadata that a value can have. +type ValueMetadata int + +const ( + // ValueMetadataAlphanumeric represents an alphanumeric value. + ValueMetadataAlphanumeric ValueMetadata = iota + // ValueMetadataAscii represents an ASCII value. + ValueMetadataAscii + // ValueMetadataBase64 represents a base64 value. + ValueMetadataBase64 + // ValueMetadataURI represents a URI value. + ValueMetadataURI + // ValueMetadataDomain represents a domain value. + ValueMetadataDomain + // ValueMetadataNumeric represents a numeric value, either integer or float. + ValueMetadataNumeric + // ValueMetadataBoolean represents a boolean value. + ValueMetadataBoolean + // ValueMetadataUnicode represents a unicode value. + ValueMetadataUnicode +) + +// NewValueMetadata returns a new ValueMetadata from a string. +func NewValueMetadata(metadata string) (ValueMetadata, bool) { + switch metadata { + case "alphanumeric": + return ValueMetadataAlphanumeric, true + case "ascii": + return ValueMetadataAscii, true + case "base64": + return ValueMetadataBase64, true + case "uri": + return ValueMetadataURI, true + case "domain": + return ValueMetadataDomain, true + case "numeric": + return ValueMetadataNumeric, true + case "boolean": + return ValueMetadataBoolean, true + case "unicode": + return ValueMetadataUnicode, true + } + return 0, false +} + +// DataMetadataList is a list of ValueMetadata. +type DataMetadataList struct { + metadata map[ValueMetadata]bool + testedTypes []ValueMetadata +} + +func (v *DataMetadataList) Test(data string, metadataType ValueMetadata) bool { + result, ok := v.metadata[metadataType] + if !ok { + // we do the analysis only once + switch metadataType { + case ValueMetadataAlphanumeric: + return v.testAlphanumeric(data) + default: + // this should not happen + return false + } + } + return result + +} + +func (v *DataMetadataList) testAlphanumeric(data string) bool { + res := true + for _, c := range data { + if !unicode.IsLetter(c) && !unicode.IsNumber(c) { + res = false + break + } + } + v.metadata[ValueMetadataAlphanumeric] = res + return res +}