diff --git a/cover_agent/CoverAgent.py b/cover_agent/CoverAgent.py index 309cd5e47..36999e683 100644 --- a/cover_agent/CoverAgent.py +++ b/cover_agent/CoverAgent.py @@ -30,6 +30,7 @@ def __init__(self, args): llm_model=args.model, api_base=args.api_base, use_report_coverage_feature_flag=args.use_report_coverage_feature_flag, + mutation_testing=args.mutation_testing, ) def _validate_paths(self): @@ -89,6 +90,8 @@ def run(self): if self.test_gen.current_coverage < (self.test_gen.desired_coverage / 100): self.test_gen.run_coverage() + self.test_gen.run_mutations() + if self.test_gen.current_coverage >= (self.test_gen.desired_coverage / 100): self.logger.info( f"Reached above target coverage of {self.test_gen.desired_coverage}% (Current Coverage: {round(self.test_gen.current_coverage * 100, 2)}%) in {iteration_count} iterations." diff --git a/cover_agent/PromptBuilder.py b/cover_agent/PromptBuilder.py index 6b81bf58d..b7205fd66 100644 --- a/cover_agent/PromptBuilder.py +++ b/cover_agent/PromptBuilder.py @@ -42,6 +42,7 @@ def __init__( additional_instructions: str = "", failed_test_runs: str = "", language: str = "python", + mutation_testing: bool = False, ): """ The `PromptBuilder` class is responsible for building a formatted prompt string by replacing placeholders with the actual content of files read during initialization. It takes in various paths and settings as parameters and provides a method to generate the prompt. @@ -72,6 +73,7 @@ def __init__( self.test_file = self._read_file(test_file_path) self.code_coverage_report = code_coverage_report self.language = language + self.mutation_testing = mutation_testing # add line numbers to each line in 'source_file'. start from 1 self.source_file_numbered = "\n".join( [f"{i + 1} {line}" for i, line in enumerate(self.source_file.split("\n"))] @@ -141,12 +143,20 @@ def build_prompt(self) -> dict: } environment = Environment(undefined=StrictUndefined) try: - system_prompt = environment.from_string( - get_settings().test_generation_prompt.system - ).render(variables) - user_prompt = environment.from_string( - get_settings().test_generation_prompt.user - ).render(variables) + if self.mutation_testing: + system_prompt = environment.from_string( + get_settings().mutation_test_prompt.system + ).render(variables) + user_prompt = environment.from_string( + get_settings().mutation_test_prompt.user + ).render(variables) + else: + system_prompt = environment.from_string( + get_settings().test_generation_prompt.system + ).render(variables) + user_prompt = environment.from_string( + get_settings().test_generation_prompt.user + ).render(variables) except Exception as e: logging.error(f"Error rendering prompt: {e}") return {"system": "", "user": ""} diff --git a/cover_agent/UnitTestGenerator.py b/cover_agent/UnitTestGenerator.py index 01594a8f1..9c861fc42 100644 --- a/cover_agent/UnitTestGenerator.py +++ b/cover_agent/UnitTestGenerator.py @@ -3,6 +3,7 @@ import os import re import json + from wandb.sdk.data_types.trace_tree import Trace from cover_agent.Runner import Runner @@ -14,6 +15,10 @@ from cover_agent.utils import load_yaml from cover_agent.settings.config_loader import get_settings +import subprocess + +from shlex import split + class UnitTestGenerator: def __init__( @@ -30,6 +35,7 @@ def __init__( desired_coverage: int = 90, # Default to 90% coverage if not specified additional_instructions: str = "", use_report_coverage_feature_flag: bool = False, + mutation_testing: bool = False, ): """ Initialize the UnitTestGenerator class with the provided parameters. @@ -65,6 +71,7 @@ def __init__( self.additional_instructions = additional_instructions self.language = self.get_code_language(source_file_path) self.use_report_coverage_feature_flag = use_report_coverage_feature_flag + self.mutation_testing = mutation_testing self.last_coverage_percentages = {} self.llm_model = llm_model @@ -79,7 +86,7 @@ def __init__( self.failed_test_runs = [] self.total_input_token_count = 0 self.total_output_token_count = 0 - + # Run coverage and build the prompt self.run_coverage() self.prompt = self.build_prompt() @@ -202,7 +209,7 @@ def run_coverage(self): "Will default to using the full coverage report. You will need to check coverage manually for each passing test." ) with open(self.code_coverage_report_path, "r") as f: - self.code_coverage_report = f.read() + self.code_coverage_report = f.read() @staticmethod def get_included_files(included_files): @@ -739,6 +746,90 @@ def to_dict(self): def to_json(self): return json.dumps(self.to_dict()) + def run_mutations(self): + self.logger.info("Running mutation tests") + + # Run mutation tests + + mutation_prompt_builder = PromptBuilder( + source_file_path=self.source_file_path, + test_file_path=self.test_file_path, + code_coverage_report=self.code_coverage_report, + included_files=self.included_files, + additional_instructions=self.additional_instructions, + failed_test_runs=self.failed_test_runs, + language=self.language, + mutation_testing=True + ) + + mutation_prompt = mutation_prompt_builder.build_prompt() + + response, prompt_token_count, response_token_count = ( + self.ai_caller.call_model(prompt=mutation_prompt) + ) + + mutation_dict = load_yaml(response) + + for mutation in mutation_dict["mutation"]: + result = self.run_mutation(mutation) + self.logger.info(f"Mutation result: {result}") + + + def run_mutation(self, mutation): + mutated_code = mutation.get("mutation", None) + line_number = mutation.get("line", None) + + + # Read the original content + with open(self.source_file_path, "r") as source_file: + original_content = source_file.readlines() + + # Determine the indentation level of the line at line_number + indentation = len(original_content[line_number]) - len(original_content[line_number].lstrip()) + + # Adjust the indentation of the mutated code + adjusted_mutated_code = [ + ' ' * indentation + line if line.strip() else line + for line in mutated_code.split("\n") + ] + + # Insert the mutated code at the specified spot + modified_content = ( + original_content[:line_number - 1] + + adjusted_mutated_code + + original_content[line_number:] + ) + + # Write the modified content back to the file + with open(self.source_file_path, "w") as source_file: + source_file.writelines(modified_content) + source_file.flush() + + # Step 2: Run the test using the Runner class + self.logger.info( + f'Running test with the following command: "{self.test_command}"' + ) + stdout, stderr, exit_code, time_of_test_command = Runner.run_command( + command=self.test_command, cwd=self.test_command_dir + ) + + try: + result = subprocess.run( + split(self.test_command), + text=True, + capture_output=True, + cwd=self.test_command_dir, + timeout=30, + ) + except Exception as e: + logging.error(f"Error running test command: {e}") + result = None + finally: + # Write the modified content back to the file + with open(self.source_file_path, "w") as source_file: + source_file.writelines(original_content) + source_file.flush() + return result def extract_error_message_python(fail_message): """ diff --git a/cover_agent/main.py b/cover_agent/main.py index 1a9a9f10c..fccc8e51e 100644 --- a/cover_agent/main.py +++ b/cover_agent/main.py @@ -101,6 +101,11 @@ def parse_args(): default="", help="Path to optional log database. Default: %(default)s.", ) + parser.add_argument( + "--mutation-testing", + action="store_true", + help="Setting this to True enables mutation testing. Default: False.", + ) return parser.parse_args() diff --git a/cover_agent/settings/config_loader.py b/cover_agent/settings/config_loader.py index 1d2622d55..138aef751 100644 --- a/cover_agent/settings/config_loader.py +++ b/cover_agent/settings/config_loader.py @@ -7,6 +7,7 @@ "language_extensions.toml", "analyze_suite_test_headers_indentation.toml", "analyze_suite_test_insert_line.toml", + "mutation_test_prompt.toml", ] diff --git a/cover_agent/settings/mutation_test_prompt.toml b/cover_agent/settings/mutation_test_prompt.toml new file mode 100644 index 000000000..61963b41d --- /dev/null +++ b/cover_agent/settings/mutation_test_prompt.toml @@ -0,0 +1,80 @@ +[mutation_test_prompt] +system="""\ +""" + +user="""\ + +You are an AI mutation testing agent tasked with mutating {{ language }} code to evaluate its robustness. + +Mutation Strategy: + +1. Logic Tweaks: + Modify conditions (e.g., 'if (a < b)' to 'if (a <= b)') + Adjust loop boundaries + Introduce minor calculation errors + Avoid drastic changes or infinite loops. + +2. Output Modifications: + Change return types or formats + Alter response structures + Return corrupted or incorrect data + +3. Method Interference: + Alter function parameters + Replace or omit key method calls + +4. Failure Injection: + Introduce exceptions or error states + Simulate system or resource failures + +5.Data Handling Faults: + Inject parsing errors + Bypass data validation + Corrupt object states + +6. Boundary Condition Testing: + Use out-of-bounds indices + Test extreme or edge-case parameters + +7. Concurrency Issues: + Simulate race conditions or deadlocks + Introduce timeouts or delays + +8. Security Vulnerabilities: + Replicate common vulnerabilities (e.g., buffer overflow, SQL injection, XSS) + Introduce authentication or authorization bypasses + + +Focus on subtle, realistic mutations that challenge the code's resilience while keeping core functionality intact. Prioritize scenarios likely to arise from programming errors or edge cases. + + +## Source Code to add Mutations to: {{ source_file_name }} +```{{language}} +{{ source_file_numbered }} +``` + +## Task +1. Conduct a line-by-line analysis of the source code. +2. Generate mutations for each test case. +3. Prioritize mutating function blocks and critical code sections. +4. Ensure the mutations offer meaningful insights into code quality and test coverage. +5. Present the output in order of ascending line numbers. +6. Avoid including manually inserted line numbers in the response. +7. Limit mutations to single-line changes only. + +Example output: +```yaml +source: {{source_file}} +mutation: + line: + mutation: | + +``` + +Use block scalar('|') to format each YAML output. + +Response (should be a valid YAML, and nothing else): +```yaml + +Generate mutants that test the code’s resilience while preserving core functionality. Output only in YAML format, with no additional explanations or comments. +""" \ No newline at end of file