Skip to content

Commit

Permalink
fix: more forgiving base64 transformation [custom implementation] (#944)
Browse files Browse the repository at this point in the history
* more forgiving base64 transformation

* better comment about second DecodeString call

* custom implementation

* comments

* address review, minor refactor

* moves new line characters check

* Extends tests with golang base64 tests

* updates link with the permalink to the exact borrowed payloads

* adds note about using custom implementation
  • Loading branch information
M4tteoP authored Jan 3, 2024
1 parent f1cfd13 commit b887a58
Show file tree
Hide file tree
Showing 2 changed files with 236 additions and 16 deletions.
82 changes: 73 additions & 9 deletions internal/transformations/base64decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,82 @@

package transformations

import (
"encoding/base64"
import "strings"

stringsutil "github.com/corazawaf/coraza/v3/internal/strings"
)
var base64DecMap = []byte{
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 127, 127, 127, 127, 127,
}

// base64decode decodes a Base64-encoded string.
// Padding is optional.
// Partial decoding is returned up to the first invalid character (if any).
// New line characters (\r and \n) are ignored.
// Note: a custom base64 decoder is used in order to return partial decoding when an error arises. It
// would be possible to use the standard library only relying on undocumented behaviors of the decoder.
// For more context, see https://github.com/corazawaf/coraza/pull/940
func base64decode(data string) (string, bool, error) {
dec, err := base64.StdEncoding.DecodeString(data)
if err != nil {
// Forgiving implementation, which ignores invalid characters
return data, false, nil
res := doBase64decode(data)
return res, true, nil
}

func doBase64decode(src string) string {
slen := len(src)
if slen == 0 {
return src
}

var n, x int
var dst strings.Builder
dst.Grow(slen)

for i := 0; i < slen; i++ {
currChar := src[i]
// new line characters are ignored.
if currChar == '\r' || currChar == '\n' {
continue
}
// If invalid character or padding reached, we stop decoding
if currChar == '=' || currChar == ' ' || currChar > 127 {
break
}
decodedChar := base64DecMap[currChar]
// Another condition of invalid character
if decodedChar == 127 {
break
}

x = (x << 6) | int(decodedChar&0x3F)
n++
if n == 4 {
dst.WriteByte(byte(x >> 16))
dst.WriteByte(byte(x >> 8))
dst.WriteByte(byte(x))
n = 0
x = 0
}
}
return stringsutil.WrapUnsafe(dec), true, nil

// Handle any remaining characters
if n == 2 {
x <<= 12
dst.WriteByte(byte(x >> 16))
} else if n == 3 {
x <<= 6
dst.WriteByte(byte(x >> 16))
dst.WriteByte(byte(x >> 8))
}

return dst.String()
}
170 changes: 163 additions & 7 deletions internal/transformations/base64decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,173 @@ import (
"testing"
)

var b64DecodeTests = []string{
"VGVzdENhc2U=",
"P.HNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==",
"VGVzdABDYXNl",
var b64DecodeTests = []struct {
name string
input string
expected string
}{
{
name: "Valid",
input: "VGVzdENhc2U=",
expected: "TestCase",
},
{
name: "Valid with \u0000",
input: "VGVzdABDYXNl",
expected: "Test\x00Case",
},
{
name: "Valid without padding",
input: "VGVzdENhc2U",
expected: "TestCase",
},
{
name: "Valid without longer padding",
input: "PA==",
expected: "<",
},
{
name: "valid <TEST>",
input: "PFRFU1Q+",
expected: "<TEST>",
},
{
name: "Malformed base64 encoding",
input: "PHNjcmlwd",
expected: "<scrip",
},
{
name: "decoded up to the space (invalid character)",
input: "PFR FU1Q+",
expected: "<T",
},
{
name: "decoded up to the dot (invalid character)",
input: "P.HNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==",
expected: "", // Only the P character does not result in a printable character conversion.
},
{
name: "decoded up to the dot (invalid character)",
input: "PHNjcmlwd.D5hbGVydCgxKTwvc2NyaXB0Pg==",
expected: "<scrip",
},
{
name: "decoded up to the dot (invalid character)",
input: "PHNjcmlwdD.5hbGVydCgxKTwvc2NyaXB0Pg==",
expected: "<script",
},
{
name: "decoded up to the dash (invalid character for base64, only valid for Base64url)",
input: "PFRFU1Q-",
expected: "<TEST",
},
// The following tests are from the golang base64 decoder tests
// Source: https://github.com/golang/go/blob/c95fe91d0715dc0a8d55ac80a80f383c3635548b/src/encoding/base64/base64_test.go#L25C4-L49
{
name: "golang test - RFC 3548 examples",
input: "FPucA9l+",
expected: "\x14\xfb\x9c\x03\xd9\x7e",
},
{
name: "golang test - RFC 3548 examples",
input: "FPucA9k=",
expected: "\x14\xfb\x9c\x03\xd9",
},
{
name: "golang test - RFC 3548 examples",
input: "FPucAw==",
expected: "\x14\xfb\x9c\x03",
},
{
name: "golang test - RFC 4648 examples",
input: "",
expected: "",
},
{
name: "golang test - RFC 4648 examples",
input: "Zg==",
expected: "f",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm8=",
expected: "fo",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9v",
expected: "foo",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9vYg==",
expected: "foob",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9vYmE=",
expected: "fooba",
},
{
name: "golang test - RFC 4648 examples",
input: "Zm9vYmFy",
expected: "foobar",
},
{
name: "golang test - Wikipedia examples",
input: "c3VyZS4=",
expected: "sure.",
},
{
name: "golang test - Wikipedia examples",
input: "c3VyZQ==",
expected: "sure",
},
{
name: "golang test - Wikipedia examples",
input: "c3Vy",
expected: "sur",
},
{
name: "golang test - Wikipedia examples",
input: "c3U=",
expected: "su",
},
{
name: "golang test - Wikipedia examples",
input: "bGVhc3VyZS4=",
expected: "leasure.",
},
{
name: "golang test - Wikipedia examples",
input: "ZWFzdXJlLg==",
expected: "easure.",
},
{
name: "golang test - Wikipedia examples",
input: "YXN1cmUu",
expected: "asure.",
},
}

func TestBase64Decode(t *testing.T) {
for _, tt := range b64DecodeTests {
t.Run(tt.name, func(t *testing.T) {
actual, _, err := base64decode(tt.input)
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if actual != tt.expected {
t.Errorf("Expected %q, but got %q", tt.expected, actual)
}
})
}
}
func BenchmarkB64Decode(b *testing.B) {
for _, tt := range b64DecodeTests {
b.Run(tt, func(b *testing.B) {
b.Run(tt.input, func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _, err := base64decode(tt)
_, _, err := base64decode(tt.input)
if err != nil {
b.Error(err)
}
Expand All @@ -31,7 +187,7 @@ func BenchmarkB64Decode(b *testing.B) {

func FuzzB64Decode(f *testing.F) {
for _, tc := range b64DecodeTests {
f.Add(tc)
f.Add(tc.input)
}
f.Fuzz(func(t *testing.T, tc string) {
data, _, err := base64decode(tc)
Expand Down

0 comments on commit b887a58

Please sign in to comment.