Detailed logging using databases (#151)

* Adding initial dabase concept. * Initial passing of unit tests. * Refactored db call. Still WIP. * Fixed db name. * Updated DB path. * LogDB working locally but not updating from Docker container out. * Migrated DB inserts to Cover Agent. * Added documentation for DB. * Fixed UnitTestDB tests. * Fixed default DB creation. * Reverting test. * Resolved empty DB path assertion. * Added before/after tests into DB. * Fixed arg call for validate test.
qodo-ai · Sep 10, 2024 · e9f9467 · e9f9467
1 parent 9634b99
commit e9f9467
Show file tree

Hide file tree

Showing 16 changed files with 1,704 additions and 1,148 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,7 @@ __pycache__
 generated_prompt.md
 test_results.html
 run.log
+*.db
 
 # PyInstaller Generated files
 /build/

diff --git a/cover_agent/CoverAgent.py b/cover_agent/CoverAgent.py
@@ -7,7 +7,7 @@
 from cover_agent.CustomLogger import CustomLogger
 from cover_agent.ReportGenerator import ReportGenerator
 from cover_agent.UnitTestGenerator import UnitTestGenerator
-
+from cover_agent.UnitTestDB import UnitTestDB
 
 class CoverAgent:
     def __init__(self, args):
@@ -41,6 +41,10 @@ def _validate_paths(self):
             raise FileNotFoundError(
                 f"Test file not found at {self.args.test_file_path}"
             )
+        if not self.args.log_db_path:
+            # Create default DB file if not provided
+            self.args.log_db_path = "cover_agent_unit_test_runs.db"
+        self.test_db = UnitTestDB(db_connection_string=f"sqlite:///{self.args.log_db_path}")
 
     def _duplicate_test_file(self):
         if self.args.test_file_output_path != "":
@@ -73,10 +77,13 @@ def run(self):
 
             for generated_test in generated_tests_dict.get("new_tests", []):
                 test_result = self.test_gen.validate_test(
-                    generated_test, generated_tests_dict, self.args.run_tests_multiple_times
+                    generated_test, self.args.run_tests_multiple_times
                 )
                 test_results_list.append(test_result)
 
+                # Insert the test result into the database
+                self.test_db.insert_attempt(test_result)
+
             iteration_count += 1
 
             if self.test_gen.current_coverage < (self.test_gen.desired_coverage / 100):

diff --git a/cover_agent/UnitTestDB.py b/cover_agent/UnitTestDB.py
@@ -0,0 +1,85 @@
+from datetime import datetime
+from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, scoped_session, load_only
+from sqlalchemy.orm.exc import NoResultFound
+
+Base = declarative_base()
+
+class UnitTestGenerationAttempt(Base):
+    __tablename__ = 'unit_test_generation_attempts'
+    id = Column(Integer, primary_key=True)
+    run_time = Column(DateTime, default=datetime.now)  # Use local time
+    status = Column(String)
+    reason = Column(Text)
+    exit_code = Column(Integer)
+    stderr = Column(Text)
+    stdout = Column(Text)
+    test_code = Column(Text)
+    imports = Column(Text)
+    original_test_file = Column(Text)
+    processed_test_file = Column(Text)
+
+class UnitTestDB:
+    def __init__(self, db_connection_string):
+        self.engine = create_engine(db_connection_string)
+        Base.metadata.create_all(self.engine)
+        self.Session = scoped_session(sessionmaker(bind=self.engine))
+
+    def insert_attempt(self, test_result: dict):
+        with self.Session() as session:
+            new_attempt = UnitTestGenerationAttempt(
+                run_time=datetime.now(),  # Use local time
+                status=test_result.get("status"),
+                reason=test_result.get("reason"),
+                exit_code=test_result.get("exit_code"),
+                stderr=test_result.get("stderr"),
+                stdout=test_result.get("stdout"),
+                test_code=test_result.get("test", {}).get("test_code", ""),
+                imports=test_result.get("test", {}).get("new_imports_code", ""),
+                original_test_file=test_result.get("original_test_file"),
+                processed_test_file=test_result.get("processed_test_file"),
+            )
+            session.add(new_attempt)
+            session.commit()
+            return new_attempt.id
+
+    def select_all_attempts(self):
+        with self.Session() as session:
+            return session.query(UnitTestGenerationAttempt).all()
+
+    def select_attempt(self, attempt_id):
+        with self.Session() as session:
+            try:
+                return session.query(UnitTestGenerationAttempt).filter_by(id=attempt_id).one()
+            except NoResultFound:
+                return None
+
+    def select_attempt_in_range(self, start: datetime, end: datetime):
+        with self.Session() as session:
+            return session.query(UnitTestGenerationAttempt).filter(
+                UnitTestGenerationAttempt.run_time >= start,
+                UnitTestGenerationAttempt.run_time <= end
+            ).all()
+
+    def select_attempt_flat(self, attempt_id):
+        with self.Session() as session:
+            try:
+                result = session.query(UnitTestGenerationAttempt).filter_by(id=attempt_id).options(
+                    load_only(
+                        UnitTestGenerationAttempt.id,
+                        UnitTestGenerationAttempt.run_time,
+                        UnitTestGenerationAttempt.status,
+                        UnitTestGenerationAttempt.reason,
+                        UnitTestGenerationAttempt.exit_code,
+                        UnitTestGenerationAttempt.stderr,
+                        UnitTestGenerationAttempt.stdout,
+                        UnitTestGenerationAttempt.test_code,
+                        UnitTestGenerationAttempt.imports,
+                        UnitTestGenerationAttempt.original_test_file,
+                        UnitTestGenerationAttempt.processed_test_file,
+                    )
+                ).one().__dict__
+                return result
+            except NoResultFound:
+                return None
diff --git a/cover_agent/UnitTestGenerator.py b/cover_agent/UnitTestGenerator.py
@@ -66,6 +66,7 @@ def __init__(
         self.language = self.get_code_language(source_file_path)
         self.use_report_coverage_feature_flag = use_report_coverage_feature_flag
         self.last_coverage_percentages = {}
+        self.llm_model = llm_model
 
         # Objects to instantiate
         self.ai_caller = AICaller(model=llm_model, api_base=api_base)
@@ -421,13 +422,12 @@ def generate_tests(self, max_tokens=4096, dry_run=False):
 
         return tests_dict
 
-    def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_attempts=1):
+    def validate_test(self, generated_test: dict, num_attempts=1):
         """
         Validate a generated test by inserting it into the test file, running the test, and checking for pass/fail.
 
         Parameters:
             generated_test (dict): The generated test to validate, containing test code and additional imports.
-            generated_tests_dict (dict): A dictionary containing information about the generated tests.
             num_attempts (int, optional): The number of attempts to run the test. Defaults to 1.
 
         Returns:
@@ -449,6 +449,10 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
             12. Handle any exceptions that occur during the validation process, log the errors, and roll back the test file if necessary.
             13. Log additional details and error messages for failed tests, and optionally, use the Trace class for detailed logging if 'WANDB_API_KEY' is present in the environment variables.
         """
+        # Store original content of the test file
+        with open(self.test_file_path, "r") as test_file:
+            original_content = test_file.read()
+
         try:
             # Step 0: no pre-process.
             # We asked the model that each generated test should be a self-contained independent test
@@ -482,12 +486,10 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
                         [delta_indent * " " + line for line in test_code.split("\n")]
                     )
             test_code_indented = "\n" + test_code_indented.strip("\n") + "\n"
-            if test_code_indented and relevant_line_number_to_insert_tests_after:
 
+            if test_code_indented and relevant_line_number_to_insert_tests_after:
                 # Step 1: Insert the generated test to the relevant line in the test file
                 additional_imports_lines = ""
-                with open(self.test_file_path, "r") as test_file:
-                    original_content = test_file.read()  # Store original content
                 original_content_lines = original_content.split("\n")
                 test_code_lines = test_code_indented.split("\n")
                 # insert the test code at the relevant line
@@ -546,6 +548,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
                         "stderr": stderr,
                         "stdout": stdout,
                         "test": generated_test,
+                        "original_test_file": original_content,
+                        "processed_test_file": processed_test,
                     }
 
                     error_message = extract_error_message_python(fail_details["stdout"])
@@ -624,6 +628,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
                             "stderr": stderr,
                             "stdout": stdout,
                             "test": generated_test,
+                            "original_test_file": original_content,
+                            "processed_test_file": processed_test,
                         }
                         self.failed_test_runs.append(
                             {
@@ -658,6 +664,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
                         "stderr": stderr,
                         "stdout": stdout,
                         "test": generated_test,
+                        "original_test_file": original_content,
+                        "processed_test_file": processed_test,
                     }
                     self.failed_test_runs.append(
                         {
@@ -675,7 +683,6 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
 
                 self.current_coverage = new_percentage_covered
 
-
                 for key in coverage_percentages:
                     if key not in self.last_coverage_percentages:
                         self.last_coverage_percentages[key] = 0
@@ -699,6 +706,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
                     "stderr": stderr,
                     "stdout": stdout,
                     "test": generated_test,
+                    "original_test_file": original_content,
+                    "processed_test_file": processed_test,
                 }
         except Exception as e:
             self.logger.error(f"Error validating test: {e}")
@@ -709,8 +718,27 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
                 "stderr": str(e),
                 "stdout": "",
                 "test": generated_test,
+                "original_test_file": original_content,
+                "processed_test_file": "N/A",
             }
 
+    def to_dict(self):
+        return {
+            "source_file_path": self.source_file_path,
+            "test_file_path": self.test_file_path,
+            "code_coverage_report_path": self.code_coverage_report_path,
+            "test_command": self.test_command,
+            "llm_model": self.llm_model,
+            "test_command_dir": self.test_command_dir,
+            "included_files": self.included_files,
+            "coverage_type": self.coverage_type,
+            "desired_coverage": self.desired_coverage,
+            "additional_instructions": self.additional_instructions,
+        }
+
+    def to_json(self):
+        return json.dumps(self.to_dict())
+
 
 def extract_error_message_python(fail_message):
     """
@@ -738,4 +766,4 @@ def extract_error_message_python(fail_message):
         return ""
     except Exception as e:
         logging.error(f"Error extracting error message: {e}")
-        return ""
+        return ""
diff --git a/cover_agent/main.py b/cover_agent/main.py
@@ -96,6 +96,11 @@ def parse_args():
         action="store_true",
         help="Setting this to True considers the coverage of all the files in the coverage report. This means we consider a test as good if it increases coverage for a different file other than the source file. Default: False.",
     )
+    parser.add_argument(
+        "--log-db-path",
+        default="",
+        help="Path to optional log database. Default: %(default)s.",
+    )
     return parser.parse_args()
 
 

diff --git a/cover_agent/version.txt b/cover_agent/version.txt
@@ -1 +1 @@
-0.1.48
+0.1.49
diff --git a/docs/Add_New_Coverage_Type_Instructions.md → docs/add_new_coverage_type_instructions.md b/docs/Add_New_Coverage_Type_Instructions.md → docs/add_new_coverage_type_instructions.md
diff --git a/docs/database_usage.md b/docs/database_usage.md
@@ -0,0 +1,66 @@
+# Using a Test Database with Cover Agent
+Note: This feature is still in beta
+
+## Requirements
+Currently, only SQLite is supported. [SQLite](https://www.sqlite.org/) uses a local `.db` file to write to and read from (versus a server based database). The long term goal is to use any type of database that is support by [SQLAlchemy](https://www.sqlalchemy.org/).
+
+You'll need to have SQLite installed in order to view the tables but to get started you can just create an empty `.db` file using the `touch` command. For example:
+```
+touch run_tests.db
+```
+
+## Running with an external DB
+You can run Cover Agent using the `--log-db-path` option. For example:
+```
+cover-agent \
+  --source-file-path "templated_tests/python_fastapi/app.py" \
+  --test-file-path "templated_tests/python_fastapi/test_app.py" \
+  --code-coverage-report-path "templated_tests/python_fastapi/coverage.xml" \
+  --test-command "pytest --cov=. --cov-report=xml --cov-report=term" \
+  --test-command-dir "templated_tests/python_fastapi" \
+  --coverage-type "cobertura" \
+  --desired-coverage 70 \
+  --max-iterations 10 \
+  --log-db-path "run_tests.db"
+```
+
+Cover Agent will create a table called `unit_test_generation_attempts` within the database.
+
+## Integration Tests
+You can run the integration test suite and pass in the local `.db` to each Docker container with the following (example) command at the root of this repository:
+```
+LOG_DB_PATH="<full_path_to_root_folder>/run_tests.db" tests_integration/test_all.sh
+```
+
+## Observing the test data
+You can look at the test results using an external database reader or the basic SQLite command line tool:
+```
+sqlite3 run_tests.db
+```
+
+Once in SQLite you can show the tables and observe that after running some tests a table called `unit_test_generation_attempts` has been created:
+```
+sqlite> .tables
+unit_test_generation_attempts
+```
+
+To get the definition of our table we can run:
+```
+sqlite> PRAGMA table_info(unit_test_generation_attempts);
+0|id|INTEGER|1||1
+1|run_time|DATETIME|0||0
+2|status|VARCHAR|0||0
+3|reason|TEXT|0||0
+4|exit_code|INTEGER|0||0
+5|stderr|TEXT|0||0
+6|stdout|TEXT|0||0
+7|test_code|TEXT|0||0
+8|imports|TEXT|0||0
+```
+
+A simple `select * from unit_test_generation_attempts;` query will display all test results (which include formatted carriage returns). This may be a bit difficult to look at from the command line so using a GUI would probably serve you a bit better.
+
+You can also filter the results to show only failed tests, for example:
+```
+sqlite> select * from unit_test_generation_attempts where status = "FAIL";
+```
diff --git a/docs/Top_Level_Sequence_Diagram.md → docs/top_level_sequence_diagram.md b/docs/Top_Level_Sequence_Diagram.md → docs/top_level_sequence_diagram.md
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,6 +21,7 @@ __pycache__ @@
     generated_prompt.md
     test_results.html
     run.log
+    *.db
     # PyInstaller Generated files
     /build/
@@ Expand Down @@