From c96d9cdef64cc6790e2d410cc1483553e7237ca0 Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Fri, 12 Apr 2024 16:23:27 +0000 Subject: [PATCH] Updated, removed bad tests. --- src/chatdbg/old_stuff/gdb_print_test.py | 109 ----------------------- src/chatdbg/old_stuff/lldb_print_test.py | 108 ---------------------- test/test_coverup_13.py | 35 -------- test/test_coverup_36.py | 52 ----------- test/test_coverup_6.py | 6 +- 5 files changed, 3 insertions(+), 307 deletions(-) delete mode 100644 src/chatdbg/old_stuff/gdb_print_test.py delete mode 100644 src/chatdbg/old_stuff/lldb_print_test.py delete mode 100644 test/test_coverup_13.py delete mode 100644 test/test_coverup_36.py diff --git a/src/chatdbg/old_stuff/gdb_print_test.py b/src/chatdbg/old_stuff/gdb_print_test.py deleted file mode 100644 index 8e4f4f2..0000000 --- a/src/chatdbg/old_stuff/gdb_print_test.py +++ /dev/null @@ -1,109 +0,0 @@ -class PrintTest(gdb.Command): - """print all variables in a run while recursing through pointers, keeping track of seen addresses""" - - def __init__(self): - super().__init__("print-test", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, True) - - def invoke(self, arg, from_tty): - help_string = "Usage: print-test [recurse_max]\n\nrecurse_max: The maximum number of times to recurse through nested structs or pointers to pointers. Default: 3" - if arg == "--help": - print(help_string) - return - recurse_max = 3 - if arg != "": - try: - recurse_max = int(arg) - except ValueError as e: - print(f"recurse_max value could not be parsed: {e}") - return - if recurse_max < 1: - print("recurse_max value must be at least 1.") - return - frame = gdb.selected_frame() - block = gdb.block_for_pc(frame.pc()) - - all_vars = [] - addresses = {} - for symbol in block: - if symbol.is_argument or symbol.is_variable: - sym_val = frame.read_var(symbol) - # Returns python dictionary for each variable - variable = self._val_to_json( - symbol.name, sym_val, recurse_max, addresses - ) - js = json.dumps(variable, indent=4) - all_vars.append(js) - - # Print all addresses and JSON objects - # print(addresses) - for j in all_vars: - print(j) - - # Converts a gdb.Value to a JSON object - def _val_to_json(self, name, val, max_recurse, address_book): - # Store address - address_book.setdefault(str(val.address.format_string()), name) - - diction = {} - # Set var name - diction["name"] = name - # Set var type - if val.type.code is gdb.TYPE_CODE_PTR: - diction["type"] = "pointer" # Default type name is "none" - elif val.type.code is gdb.TYPE_CODE_ARRAY: - diction["type"] = "array" # Default type name is "none" - else: - diction["type"] = val.type.name - # Dereference pointers - if val.type.code is gdb.TYPE_CODE_PTR: - if val: - value = "->" - try: - deref_val = val.referenced_value() - # If dereferenced value is "seen", then get name from address book - if deref_val.address.format_string() in address_book: - diction["value"] = address_book[ - deref_val.address.format_string() - ] - else: - # Recurse up to max_recurse times - for i in range(max_recurse - 1): - if deref_val.type.code is gdb.TYPE_CODE_PTR: - value += "->" - deref_val = deref_val.referenced_value() - elif deref_val.type.code is gdb.TYPE_CODE_STRUCT: - value = self._val_to_json( - value + name, - deref_val, - max_recurse - i - 1, - address_book, - ) - break - else: - break - # Append to -> string or not, depending on type of value - if isinstance(value, dict): - diction["value"] = value - else: - diction["value"] = value + deref_val.format_string() - except Exception as e: - diction["value"] = value + "Exception" - else: - # Nullptr case, might be a better way to represent - diction["value"] = "nullptr" - # If struct, recurse through fields - elif val.type.code is gdb.TYPE_CODE_STRUCT: - fields = [] - for f in val.type.fields(): - fields.append( - self._val_to_json( - f.name, val[f.name], max_recurse - 1, address_book - ) - ) - diction["value"] = fields - else: - diction["value"] = val.format_string() - return diction - - -PrintTest() diff --git a/src/chatdbg/old_stuff/lldb_print_test.py b/src/chatdbg/old_stuff/lldb_print_test.py deleted file mode 100644 index b3d1d00..0000000 --- a/src/chatdbg/old_stuff/lldb_print_test.py +++ /dev/null @@ -1,108 +0,0 @@ -@lldb.command("print-test") -def print_test( - debugger: lldb.SBDebugger, - command: str, - result: lldb.SBCommandReturnObject, - internal_dict: dict, -) -> None: - """print all variables in a run while recursing through pointers, keeping track of seen addresses""" - - args = command.split() - recurse_max = 3 - help_string = "Usage: print-test [recurse_max]\n\nrecurse_max: The maximum number of times to recurse through nested structs or pointers to pointers. Default: 3" - if len(args) > 1 or (len(args) == 1 and args[0] == "--help"): - print(help_string) - return - elif len(args) == 1: - try: - recurse_max = int(args[0]) - except ValueError as e: - print("recurse_max value could not be parsed: %s\n" % args[0]) - return - if recurse_max < 1: - print("recurse_max value must be at least 1.\n") - return - frame = ( - lldb.debugger.GetSelectedTarget() - .GetProcess() - .GetSelectedThread() - .GetSelectedFrame() - ) - - all_vars = [] - addresses = {} - for var in frame.get_all_variables(): - # Returns python dictionary for each variable, converts to JSON - variable = _val_to_json(var, recurse_max, addresses) - js = json.dumps(variable, indent=4) - all_vars.append(js) - - # Print all addresses and JSON objects - # print(addresses) - for j in all_vars: - print(j) - return - - -def _val_to_json( - var: lldb.SBValue, - recurse_max: int, - address_book: dict, -) -> dict: - # Store address - address_book.setdefault(str(var.GetAddress()), var.GetName()) - - json = {} - json["name"] = var.GetName() - json["type"] = var.GetTypeName() - # Dereference pointers - if "*" in var.GetType().GetName(): - if var.GetValueAsUnsigned() != 0: - value = "->" - try: - deref_val = var.Dereference() - # If dereferenced value is "seen", then get name from address book - if str(deref_val.GetAddress()) in address_book: - json["value"] = address_book[str(deref_val.GetAddress())] - else: - # Recurse up to max_recurse times - for i in range(recurse_max - 1): - if "*" in deref_val.GetType().GetName(): - value += "->" - deref_val = deref_val.Dereference() - elif len(deref_val.GetType().get_fields_array()) > 0: - value = _val_to_json( - deref_val, - recurse_max - i - 1, - address_book, - ) - break - else: - break - # Append to -> string or not, depending on type of value - if isinstance(value, dict): - json["value"] = value - else: - json["value"] = ( - value + str(deref_val)[str(deref_val).find("= ") + 2 :] - ) - except Exception as e: - json["value"] = value + "Exception" - else: - json["value"] = "nullptr" - # Recurse through struct fields - elif len(var.GetType().get_fields_array()) > 0: - fields = [] - for i in range(var.GetNumChildren()): - f = var.GetChildAtIndex(i) - fields.append( - _val_to_json( - f, - recurse_max - 1, - address_book, - ) - ) - json["value"] = fields - else: - json["value"] = str(var)[str(var).find("= ") + 2 :] - return json diff --git a/test/test_coverup_13.py b/test/test_coverup_13.py deleted file mode 100644 index 63785c3..0000000 --- a/test/test_coverup_13.py +++ /dev/null @@ -1,35 +0,0 @@ -# file src/chatdbg/util/trim.py:68-128 -# lines [68, 69, 70, 71, 72, 85, 87, 88, 90, 91, 93, 97, 101, 102, 103, 104, 108, 109, 110, 112, 114, 117, 120, 124, 125, 126, 128] -# branches ['90->91', '90->93', '101->102', '101->108', '103->101', '103->104', '108->109', '108->124', '110->112', '110->113', '113->117', '113->120'] - -import pytest -from unittest.mock import MagicMock, patch -from chatdbg.util.trim import trim_messages - -# Mocking the external dependencies and global variables -litellm = MagicMock() -litellm.model_cost = { - "test_model": {"max_tokens": 100}, - "another_model": {"max_tokens": 200}, -} -litellm.token_counter = lambda model, messages: sum(len(m['content']) for m in messages) -chunkify = lambda messages, model: [([m], False) for m in messages] -sum_messages = lambda messages, model: sum(len(m['content']) for m in messages) -sum_kept_chunks = lambda chunks, model: sum(sum_messages(m, model) for m, kept in chunks if kept) -sum_all_chunks = lambda chunks, model: sum(sum_messages(m, model) for m, kept in chunks) - -# Test to cover the branch where token count is less than max_tokens -def test_trim_messages_no_trimming_required(): - messages = [ - {"role": "user", "content": "Hello"}, - {"role": "system", "content": "System message"}, - {"role": "user", "content": "How are you?"}, - ] - model = "test_model" - with patch('chatdbg.util.trim.litellm', litellm), \ - patch('chatdbg.util.trim.chunkify', chunkify), \ - patch('chatdbg.util.trim.sum_messages', sum_messages), \ - patch('chatdbg.util.trim.sum_kept_chunks', sum_kept_chunks), \ - patch('chatdbg.util.trim.sum_all_chunks', sum_all_chunks): - trimmed_messages = trim_messages(messages, model) - assert trimmed_messages == messages, "No messages should be trimmed if under max_tokens" diff --git a/test/test_coverup_36.py b/test/test_coverup_36.py deleted file mode 100644 index f16c502..0000000 --- a/test/test_coverup_36.py +++ /dev/null @@ -1,52 +0,0 @@ -# file src/chatdbg/util/trim.py:33-34 -# lines [33, 34] -# branches [] - -import pytest -from chatdbg.util.trim import sum_kept_chunks - -# Mock function to replace sum_messages -def mock_sum_messages(messages, model): - return len(messages) - -# Test function to cover the case where 'kept' is True -def test_sum_kept_chunks_with_kept_true(monkeypatch): - # Replace the original sum_messages with the mock function - monkeypatch.setattr("chatdbg.util.trim.sum_messages", mock_sum_messages) - - # Define chunks with 'kept' as True - chunks = [(['message1', 'message2'], True), (['message3'], True)] - - # Call the function under test - result = sum_kept_chunks(chunks, None) - - # Assert that the result is as expected - assert result == 3 # Because there are 3 messages in total and all are kept - -# Test function to cover the case where 'kept' is False -def test_sum_kept_chunks_with_kept_false(monkeypatch): - # Replace the original sum_messages with the mock function - monkeypatch.setattr("chatdbg.util.trim.sum_messages", mock_sum_messages) - - # Define chunks with 'kept' as False - chunks = [(['message1', 'message2'], False), (['message3'], False)] - - # Call the function under test - result = sum_kept_chunks(chunks, None) - - # Assert that the result is as expected - assert result == 0 # Because no messages are kept - -# Test function to cover the case with a mix of 'kept' True and False -def test_sum_kept_chunks_with_mixed_kept(monkeypatch): - # Replace the original sum_messages with the mock function - monkeypatch.setattr("chatdbg.util.trim.sum_messages", mock_sum_messages) - - # Define chunks with a mix of 'kept' True and False - chunks = [(['message1', 'message2'], True), (['message3'], False), (['message4', 'message5'], True)] - - # Call the function under test - result = sum_kept_chunks(chunks, None) - - # Assert that the result is as expected - assert result == 4 # Because there are 4 messages kept (2 in the first chunk and 2 in the third chunk) diff --git a/test/test_coverup_6.py b/test/test_coverup_6.py index 9d1baad..0f8694c 100644 --- a/test/test_coverup_6.py +++ b/test/test_coverup_6.py @@ -3,7 +3,7 @@ # branches ['44->45', '44->52', '45->46', '45->51'] import pytest -from chatdbg.util.trim import extract +from chatdbg.util.trim import _extract from unittest.mock import patch @pytest.fixture @@ -27,7 +27,7 @@ def test_extract_with_tool_call_ids(mock_sandwich_tokens, mock_litellm_encode): model = "model" tool_call_ids = [1, 2] - tools, other = extract(messages, model, tool_call_ids) + tools, other = _extract(messages, model, tool_call_ids) assert mock_sandwich_tokens.called assert len(tools) == 2 @@ -45,7 +45,7 @@ def test_extract_without_tool_call_ids(mock_sandwich_tokens, mock_litellm_encode model = "model" tool_call_ids = [3] - tools, other = extract(messages, model, tool_call_ids) + tools, other = _extract(messages, model, tool_call_ids) assert not mock_sandwich_tokens.called assert len(tools) == 0