Skip to content

Commit

Permalink
Compile regex in-place, rename CHAR to CHAR_RE due to Windows typedef…
Browse files Browse the repository at this point in the history
…'ing CHAR, correctly free compiled needle and associated buffer
  • Loading branch information
sjanusz-r7 committed Dec 1, 2023
1 parent 46a08c1 commit 616d785
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 145 deletions.
46 changes: 44 additions & 2 deletions c/meterpreter/source/extensions/stdapi/server/sys/process/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ struct regex_needle
char* raw_needle_buffer;
size_t length;
regex_t* compiled_needle;
unsigned char* char_buf;
};

#define NEEDLES_MAX (size_t)5
Expand Down Expand Up @@ -508,9 +509,38 @@ DWORD request_sys_process_memory_search(Remote* remote, Packet* packet)

dprintf("[MEM SEARCH] Needle %u : %.*s with size (in bytes) %u", needle_enum_index, needle_length, regex_needles[needle_enum_index]->raw_needle_buffer, needle_length);

dprintf("[MEM SEARCH] Allocating memory for a compiled needle");
regex_needles[needle_enum_index]->compiled_needle = (regex_t*)malloc(MAX_REGEXP_OBJECTS * sizeof(struct regex_t));
if (regex_needles[needle_enum_index]->compiled_needle == NULL)
{
dprintf("[MEM SEARCH] Unable to malloc memory for a compiled needle");
result = ERROR_OUTOFMEMORY;
goto done;
}

dprintf("[MEM SEARCH] Allocating memory for a char buffer");
regex_needles[needle_enum_index]->char_buf = (unsigned char*)malloc(MAX_CHAR_CLASS_LEN * sizeof(unsigned char));
if (regex_needles[needle_enum_index]->char_buf == NULL)
{
dprintf("[MEM SEARCH] Unable to malloc memory for a char buffer");
result = ERROR_OUTOFMEMORY;
goto done;
}

dprintf("[MEM SEARCH] Compiling needle: %.*s", needle_length, (char*)needle_buffer_tlv.buffer);
regex_needles[needle_enum_index]->compiled_needle = re_compile(regex_needles[needle_enum_index]->raw_needle_buffer, regex_needles[needle_enum_index]->length);
if (regex_needles[needle_enum_index]->compiled_needle == NULL) { dprintf("[MEM SEARCH] Failed to compile needle"); result = ERROR_OUTOFMEMORY; goto done; }

const int compile_result = re_compile(regex_needles[needle_enum_index]->raw_needle_buffer,
regex_needles[needle_enum_index]->length,
MAX_REGEXP_OBJECTS,
MAX_CHAR_CLASS_LEN,
&regex_needles[needle_enum_index]->compiled_needle,
&regex_needles[needle_enum_index]->char_buf);
if (compile_result != ERROR_SUCCESS)
{
dprintf("[MEM SEARCH] Failed to compile needle");
result = ERROR_INVALID_PARAMETER;
goto done;
}

needle_enum_index++;
}
Expand Down Expand Up @@ -671,6 +701,18 @@ DWORD request_sys_process_memory_search(Remote* remote, Packet* packet)
free(regex_needles[i]->raw_needle_buffer);
}

if (regex_needles[i]->char_buf != NULL)
{
dprintf("[MEM SEARCH] Freeing char buf");
free(regex_needles[i]->char_buf);
}

if (regex_needles[i]->compiled_needle != NULL)
{
dprintf("[MEM SEARCH] Freeing compiled needle");
free(regex_needles[i]->compiled_needle);
}

dprintf("[MEM SEARCH] Freeing regex needle.");
free(regex_needles[i]);
}
Expand Down
256 changes: 121 additions & 135 deletions c/meterpreter/source/tiny-regex-c/re.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,7 @@

/* Definitions: */

#define MAX_REGEXP_OBJECTS 256 /* Max number of regex symbols in expression. */
#define MAX_CHAR_CLASS_LEN 256 /* Max length of character-class buffer in. */


enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ };
enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR_RE, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ };

/* Private function declarations: */
static int matchpattern(regex_t* pattern, const char* text, size_t text_length, size_t text_offset, size_t* matchlength);
Expand All @@ -54,14 +50,7 @@ static int matchrange(char c, const char* str);
static int matchdot(char c);
static int ismetachar(char c);



/* Public functions: */
int re_match(const char* pattern, size_t pattern_length, const char* text, size_t text_length, size_t* matchlength)
{
return re_matchp(re_compile(pattern, pattern_length), text, text_length, matchlength);
}

int re_matchp(re_t pattern, const char* text, size_t text_length, size_t* matchlength)
{
*matchlength = 0;
Expand Down Expand Up @@ -89,146 +78,143 @@ int re_matchp(re_t pattern, const char* text, size_t text_length, size_t* matchl
return -1;
}

re_t re_compile(const char* pattern, size_t pattern_length)
int re_compile(const char* pattern, size_t pattern_length, size_t max_regex_objects, size_t max_char_class_len, re_t* out_compiled, unsigned char** out_ccl)
{
/* The sizes of the two static arrays below substantiates the static RAM usage of this module.
MAX_REGEXP_OBJECTS is the max number of symbols in the expression.
MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */
static regex_t re_compiled[MAX_REGEXP_OBJECTS];
static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];
int ccl_bufidx = 1;

char c; /* current char in pattern */
int i = 0; /* index into pattern */
int j = 0; /* index into re_compiled */

while (i < (int)pattern_length && (j+1 < MAX_REGEXP_OBJECTS))
{
c = pattern[i];
if (out_compiled == NULL || out_ccl == NULL) { return 1; }

switch (c)
{
/* Meta-characters: */
case '^': { re_compiled[j].type = BEGIN; } break;
case '$': { re_compiled[j].type = END; } break;
case '.': { re_compiled[j].type = DOT; } break;
case '*': { re_compiled[j].type = STAR; } break;
case '+': { re_compiled[j].type = PLUS; } break;
case '?': { re_compiled[j].type = QUESTIONMARK; } break;
/* case '|': { re_compiled[j].type = BRANCH; } break; <-- not working properly */

/* Escaped character-classes (\s \w ...): */
case '\\':
{
if (i + 1 < (int)pattern_length)
{
/* Skip the escape-char '\\' */
i += 1;
/* ... and check the next */
switch (pattern[i])
{
/* Meta-character: */
case 'd': { re_compiled[j].type = DIGIT; } break;
case 'D': { re_compiled[j].type = NOT_DIGIT; } break;
case 'w': { re_compiled[j].type = ALPHA; } break;
case 'W': { re_compiled[j].type = NOT_ALPHA; } break;
case 's': { re_compiled[j].type = WHITESPACE; } break;
case 'S': { re_compiled[j].type = NOT_WHITESPACE; } break;

/* Escaped character, e.g. '.' or '$' */
default:
{
re_compiled[j].type = CHAR;
re_compiled[j].u.ch = pattern[i];
} break;
}
}
/* '\\' as last char in pattern -> invalid regular expression. */
/*
else
{
re_compiled[j].type = CHAR;
re_compiled[j].ch = pattern[i];
}
*/
} break;
int ccl_bufidx = 1;

/* Character class: */
case '[':
{
/* Remember where the char-buffer starts. */
int buf_begin = ccl_bufidx;
char c; /* current char in pattern */
int i = 0; /* index into pattern */
int j = 0; /* index into re_compiled */

while (i < (int)pattern_length && (j + 1 < MAX_REGEXP_OBJECTS))
{
c = pattern[i];

/* Look-ahead to determine if negated */
if (pattern[i+1] == '^')
switch (c)
{
re_compiled[j].type = INV_CHAR_CLASS;
i += 1; /* Increment i to avoid including '^' in the char-buffer */
if (pattern[i+1] == 0) /* incomplete pattern, missing non-zero char after '^' */
{
return 0;
}
}
else
/* Meta-characters: */
case '^': { (*out_compiled)[j].type = BEGIN; } break;
case '$': { (*out_compiled)[j].type = END; } break;
case '.': { (*out_compiled)[j].type = DOT; } break;
case '*': { (*out_compiled)[j].type = STAR; } break;
case '+': { (*out_compiled)[j].type = PLUS; } break;
case '?': { (*out_compiled)[j].type = QUESTIONMARK; } break;
/* case '|': { re_compiled[j].type = BRANCH; } break; <-- not working properly */

/* Escaped character-classes (\s \w ...): */
case '\\':
{
re_compiled[j].type = CHAR_CLASS;
}
if (i + 1 < (int)pattern_length)
{
/* Skip the escape-char '\\' */
i += 1;
/* ... and check the next */
switch (pattern[i])
{
/* Meta-character: */
case 'd': { (*out_compiled)[j].type = DIGIT; } break;
case 'D': { (*out_compiled)[j].type = NOT_DIGIT; } break;
case 'w': { (*out_compiled)[j].type = ALPHA; } break;
case 'W': { (*out_compiled)[j].type = NOT_ALPHA; } break;
case 's': { (*out_compiled)[j].type = WHITESPACE; } break;
case 'S': { (*out_compiled)[j].type = NOT_WHITESPACE; } break;

/* Escaped character, e.g. '.' or '$' */
default:
{
(*out_compiled)[j].type = CHAR_RE;
(*out_compiled)[j].u.ch = pattern[i];
} break;
}
}
/* '\\' as last char in pattern -> invalid regular expression. */
/*
else
{
(*out_compiled)[j].type = CHAR;
(*out_compiled)[j].ch = pattern[i];
}
*/
} break;

/* Copy characters inside [..] to buffer */
while ( (pattern[++i] != ']')
&& (pattern[i] != '\0')) /* Missing ] */
/* Character class: */
case '[':
{
if (pattern[i] == '\\')
{
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1)
/* Remember where the char-buffer starts. */
int buf_begin = ccl_bufidx;

/* Look-ahead to determine if negated */
if (pattern[i + 1] == '^')
{
//fputs("exceeded internal buffer!\n", stderr);
return 0;
(*out_compiled)[j].type = INV_CHAR_CLASS;
i += 1; /* Increment i to avoid including '^' in the char-buffer */
if (pattern[i + 1] == 0) /* incomplete pattern, missing non-zero char after '^' */
{
return 1;
}
}
if (pattern[i+1] == 0) /* incomplete pattern, missing non-zero char after '\\' */
else
{
(*out_compiled)[j].type = CHAR_CLASS;
}

/* Copy characters inside [..] to buffer */
while ((pattern[++i] != ']')
&& (i < (int)pattern_length)) /* Missing ] */
{
return 0;
if (pattern[i] == '\\')
{
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1)
{
//fputs("exceeded internal buffer!\n", stderr);
return 1;
}
if (pattern[i + 1] == 0) /* incomplete pattern, missing non-zero char after '\\' */
{
return 1;
}
(*out_ccl)[ccl_bufidx++] = pattern[i++];
}
else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
//fputs("exceeded internal buffer!\n", stderr);
return 1;
}
(*out_ccl)[ccl_bufidx++] = pattern[i];
}
ccl_buf[ccl_bufidx++] = pattern[i++];
}
else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
//fputs("exceeded internal buffer!\n", stderr);
return 0;
}
ccl_buf[ccl_bufidx++] = pattern[i];
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
/* Catches cases such as [00000000000000000000000000000000000000][ */
//fputs("exceeded internal buffer!\n", stderr);
return 1;
}
/* Null-terminate string end */
(*out_ccl)[ccl_bufidx++] = 0;
(*out_compiled)[j].u.ccl = &(*out_ccl)[buf_begin];
} break;

/* Other characters: */
default:
{
(*out_compiled)[j].type = CHAR_RE;
(*out_compiled)[j].u.ch = c;
} break;
}
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
/* no buffer-out-of-bounds access on invalid patterns - see https://github.com/kokke/tiny-regex-c/commit/1a279e04014b70b0695fba559a7c05d55e6ee90b */
if (pattern[i] == 0)
{
/* Catches cases such as [00000000000000000000000000000000000000][ */
//fputs("exceeded internal buffer!\n", stderr);
return 0;
return 1;
}
/* Null-terminate string end */
ccl_buf[ccl_bufidx++] = 0;
re_compiled[j].u.ccl = &ccl_buf[buf_begin];
} break;

/* Other characters: */
default:
{
re_compiled[j].type = CHAR;
re_compiled[j].u.ch = c;
} break;
i += 1;
j += 1;
}
/* no buffer-out-of-bounds access on invalid patterns - see https://github.com/kokke/tiny-regex-c/commit/1a279e04014b70b0695fba559a7c05d55e6ee90b */
if (pattern[i] == 0)
{
return 0;
}

i += 1;
j += 1;
}
/* 'UNUSED' is a sentinel used to indicate end-of-pattern */
re_compiled[j].type = UNUSED;
/* 'UNUSED' is a sentinel used to indicate end-of-pattern */
(*out_compiled)[j].type = UNUSED;

return (re_t) re_compiled;
return 0; // ERROR_SUCCESS
}

void re_print(regex_t* pattern)
Expand Down Expand Up @@ -260,7 +246,7 @@ void re_print(regex_t* pattern)
}
printf("]");
}
else if (pattern[i].type == CHAR)
else if (pattern[i].type == CHAR_RE)
{
printf(" '%c'", pattern[i].u.ch);
}
Expand Down
12 changes: 4 additions & 8 deletions c/meterpreter/source/tiny-regex-c/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,14 @@ typedef struct regex_t
/* Typedef'd pointer to get abstract datatype. */
typedef struct regex_t* re_t;


/* Compile regex string pattern to a regex_t-array. */
re_t re_compile(const char* pattern, size_t pattern_length);

#define MAX_REGEXP_OBJECTS 255 /* Max number of regex symbols in expression. */
#define MAX_CHAR_CLASS_LEN 255 /* Max length of character-class buffer in. */

/* Find matches of the compiled pattern inside text. */
int re_matchp(re_t pattern, const char* text, size_t text_length, size_t* matchlength);


/* Find matches of the txt pattern inside text (will compile automatically first). */
int re_match(const char* pattern, size_t pattern_length, const char* text, size_t text_length, size_t* matchlength);

/* Compile a regular expression in-place, allowing for multiple needles to be compiled without the usage of a static buffer. Returns ERROR_SUCCESS (0) on success, else 1. */
int re_compile(const char* pattern, size_t pattern_length, size_t max_regex_objects, size_t max_char_class_len, re_t* out_compiled, unsigned char** out_ccl);

#ifdef __cplusplus
}
Expand Down

0 comments on commit 616d785

Please sign in to comment.