Skip to content

Commit

Permalink
Detailed logging using databases (#151)
Browse files Browse the repository at this point in the history
* Adding initial dabase concept.

* Initial passing of unit tests.

* Refactored db call. Still WIP.

* Fixed db name.

* Updated DB path.

* LogDB working locally but not updating from Docker container out.

* Migrated DB inserts to Cover Agent.

* Added documentation for DB.

* Fixed UnitTestDB tests.

* Fixed default DB creation.

* Reverting test.

* Resolved empty DB path assertion.

* Added before/after tests into DB.

* Fixed arg call for validate test.
  • Loading branch information
EmbeddedDevops1 authored Sep 10, 2024
1 parent 9634b99 commit e9f9467
Show file tree
Hide file tree
Showing 16 changed files with 1,704 additions and 1,148 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ __pycache__
generated_prompt.md
test_results.html
run.log
*.db

# PyInstaller Generated files
/build/
Expand Down
11 changes: 9 additions & 2 deletions cover_agent/CoverAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from cover_agent.CustomLogger import CustomLogger
from cover_agent.ReportGenerator import ReportGenerator
from cover_agent.UnitTestGenerator import UnitTestGenerator

from cover_agent.UnitTestDB import UnitTestDB

class CoverAgent:
def __init__(self, args):
Expand Down Expand Up @@ -41,6 +41,10 @@ def _validate_paths(self):
raise FileNotFoundError(
f"Test file not found at {self.args.test_file_path}"
)
if not self.args.log_db_path:
# Create default DB file if not provided
self.args.log_db_path = "cover_agent_unit_test_runs.db"
self.test_db = UnitTestDB(db_connection_string=f"sqlite:///{self.args.log_db_path}")

def _duplicate_test_file(self):
if self.args.test_file_output_path != "":
Expand Down Expand Up @@ -73,10 +77,13 @@ def run(self):

for generated_test in generated_tests_dict.get("new_tests", []):
test_result = self.test_gen.validate_test(
generated_test, generated_tests_dict, self.args.run_tests_multiple_times
generated_test, self.args.run_tests_multiple_times
)
test_results_list.append(test_result)

# Insert the test result into the database
self.test_db.insert_attempt(test_result)

iteration_count += 1

if self.test_gen.current_coverage < (self.test_gen.desired_coverage / 100):
Expand Down
85 changes: 85 additions & 0 deletions cover_agent/UnitTestDB.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from datetime import datetime
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session, load_only
from sqlalchemy.orm.exc import NoResultFound

Base = declarative_base()

class UnitTestGenerationAttempt(Base):
__tablename__ = 'unit_test_generation_attempts'
id = Column(Integer, primary_key=True)
run_time = Column(DateTime, default=datetime.now) # Use local time
status = Column(String)
reason = Column(Text)
exit_code = Column(Integer)
stderr = Column(Text)
stdout = Column(Text)
test_code = Column(Text)
imports = Column(Text)
original_test_file = Column(Text)
processed_test_file = Column(Text)

class UnitTestDB:
def __init__(self, db_connection_string):
self.engine = create_engine(db_connection_string)
Base.metadata.create_all(self.engine)
self.Session = scoped_session(sessionmaker(bind=self.engine))

def insert_attempt(self, test_result: dict):
with self.Session() as session:
new_attempt = UnitTestGenerationAttempt(
run_time=datetime.now(), # Use local time
status=test_result.get("status"),
reason=test_result.get("reason"),
exit_code=test_result.get("exit_code"),
stderr=test_result.get("stderr"),
stdout=test_result.get("stdout"),
test_code=test_result.get("test", {}).get("test_code", ""),
imports=test_result.get("test", {}).get("new_imports_code", ""),
original_test_file=test_result.get("original_test_file"),
processed_test_file=test_result.get("processed_test_file"),
)
session.add(new_attempt)
session.commit()
return new_attempt.id

def select_all_attempts(self):
with self.Session() as session:
return session.query(UnitTestGenerationAttempt).all()

def select_attempt(self, attempt_id):
with self.Session() as session:
try:
return session.query(UnitTestGenerationAttempt).filter_by(id=attempt_id).one()
except NoResultFound:
return None

def select_attempt_in_range(self, start: datetime, end: datetime):
with self.Session() as session:
return session.query(UnitTestGenerationAttempt).filter(
UnitTestGenerationAttempt.run_time >= start,
UnitTestGenerationAttempt.run_time <= end
).all()

def select_attempt_flat(self, attempt_id):
with self.Session() as session:
try:
result = session.query(UnitTestGenerationAttempt).filter_by(id=attempt_id).options(
load_only(
UnitTestGenerationAttempt.id,
UnitTestGenerationAttempt.run_time,
UnitTestGenerationAttempt.status,
UnitTestGenerationAttempt.reason,
UnitTestGenerationAttempt.exit_code,
UnitTestGenerationAttempt.stderr,
UnitTestGenerationAttempt.stdout,
UnitTestGenerationAttempt.test_code,
UnitTestGenerationAttempt.imports,
UnitTestGenerationAttempt.original_test_file,
UnitTestGenerationAttempt.processed_test_file,
)
).one().__dict__
return result
except NoResultFound:
return None
42 changes: 35 additions & 7 deletions cover_agent/UnitTestGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(
self.language = self.get_code_language(source_file_path)
self.use_report_coverage_feature_flag = use_report_coverage_feature_flag
self.last_coverage_percentages = {}
self.llm_model = llm_model

# Objects to instantiate
self.ai_caller = AICaller(model=llm_model, api_base=api_base)
Expand Down Expand Up @@ -421,13 +422,12 @@ def generate_tests(self, max_tokens=4096, dry_run=False):

return tests_dict

def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_attempts=1):
def validate_test(self, generated_test: dict, num_attempts=1):
"""
Validate a generated test by inserting it into the test file, running the test, and checking for pass/fail.
Parameters:
generated_test (dict): The generated test to validate, containing test code and additional imports.
generated_tests_dict (dict): A dictionary containing information about the generated tests.
num_attempts (int, optional): The number of attempts to run the test. Defaults to 1.
Returns:
Expand All @@ -449,6 +449,10 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
12. Handle any exceptions that occur during the validation process, log the errors, and roll back the test file if necessary.
13. Log additional details and error messages for failed tests, and optionally, use the Trace class for detailed logging if 'WANDB_API_KEY' is present in the environment variables.
"""
# Store original content of the test file
with open(self.test_file_path, "r") as test_file:
original_content = test_file.read()

try:
# Step 0: no pre-process.
# We asked the model that each generated test should be a self-contained independent test
Expand Down Expand Up @@ -482,12 +486,10 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
[delta_indent * " " + line for line in test_code.split("\n")]
)
test_code_indented = "\n" + test_code_indented.strip("\n") + "\n"
if test_code_indented and relevant_line_number_to_insert_tests_after:

if test_code_indented and relevant_line_number_to_insert_tests_after:
# Step 1: Insert the generated test to the relevant line in the test file
additional_imports_lines = ""
with open(self.test_file_path, "r") as test_file:
original_content = test_file.read() # Store original content
original_content_lines = original_content.split("\n")
test_code_lines = test_code_indented.split("\n")
# insert the test code at the relevant line
Expand Down Expand Up @@ -546,6 +548,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
"stderr": stderr,
"stdout": stdout,
"test": generated_test,
"original_test_file": original_content,
"processed_test_file": processed_test,
}

error_message = extract_error_message_python(fail_details["stdout"])
Expand Down Expand Up @@ -624,6 +628,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
"stderr": stderr,
"stdout": stdout,
"test": generated_test,
"original_test_file": original_content,
"processed_test_file": processed_test,
}
self.failed_test_runs.append(
{
Expand Down Expand Up @@ -658,6 +664,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
"stderr": stderr,
"stdout": stdout,
"test": generated_test,
"original_test_file": original_content,
"processed_test_file": processed_test,
}
self.failed_test_runs.append(
{
Expand All @@ -675,7 +683,6 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at

self.current_coverage = new_percentage_covered


for key in coverage_percentages:
if key not in self.last_coverage_percentages:
self.last_coverage_percentages[key] = 0
Expand All @@ -699,6 +706,8 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
"stderr": stderr,
"stdout": stdout,
"test": generated_test,
"original_test_file": original_content,
"processed_test_file": processed_test,
}
except Exception as e:
self.logger.error(f"Error validating test: {e}")
Expand All @@ -709,8 +718,27 @@ def validate_test(self, generated_test: dict, generated_tests_dict: dict, num_at
"stderr": str(e),
"stdout": "",
"test": generated_test,
"original_test_file": original_content,
"processed_test_file": "N/A",
}

def to_dict(self):
return {
"source_file_path": self.source_file_path,
"test_file_path": self.test_file_path,
"code_coverage_report_path": self.code_coverage_report_path,
"test_command": self.test_command,
"llm_model": self.llm_model,
"test_command_dir": self.test_command_dir,
"included_files": self.included_files,
"coverage_type": self.coverage_type,
"desired_coverage": self.desired_coverage,
"additional_instructions": self.additional_instructions,
}

def to_json(self):
return json.dumps(self.to_dict())


def extract_error_message_python(fail_message):
"""
Expand Down Expand Up @@ -738,4 +766,4 @@ def extract_error_message_python(fail_message):
return ""
except Exception as e:
logging.error(f"Error extracting error message: {e}")
return ""
return ""
5 changes: 5 additions & 0 deletions cover_agent/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ def parse_args():
action="store_true",
help="Setting this to True considers the coverage of all the files in the coverage report. This means we consider a test as good if it increases coverage for a different file other than the source file. Default: False.",
)
parser.add_argument(
"--log-db-path",
default="",
help="Path to optional log database. Default: %(default)s.",
)
return parser.parse_args()


Expand Down
2 changes: 1 addition & 1 deletion cover_agent/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.48
0.1.49
File renamed without changes.
66 changes: 66 additions & 0 deletions docs/database_usage.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Using a Test Database with Cover Agent
Note: This feature is still in beta

## Requirements
Currently, only SQLite is supported. [SQLite](https://www.sqlite.org/) uses a local `.db` file to write to and read from (versus a server based database). The long term goal is to use any type of database that is support by [SQLAlchemy](https://www.sqlalchemy.org/).

You'll need to have SQLite installed in order to view the tables but to get started you can just create an empty `.db` file using the `touch` command. For example:
```
touch run_tests.db
```

## Running with an external DB
You can run Cover Agent using the `--log-db-path` option. For example:
```
cover-agent \
--source-file-path "templated_tests/python_fastapi/app.py" \
--test-file-path "templated_tests/python_fastapi/test_app.py" \
--code-coverage-report-path "templated_tests/python_fastapi/coverage.xml" \
--test-command "pytest --cov=. --cov-report=xml --cov-report=term" \
--test-command-dir "templated_tests/python_fastapi" \
--coverage-type "cobertura" \
--desired-coverage 70 \
--max-iterations 10 \
--log-db-path "run_tests.db"
```

Cover Agent will create a table called `unit_test_generation_attempts` within the database.

## Integration Tests
You can run the integration test suite and pass in the local `.db` to each Docker container with the following (example) command at the root of this repository:
```
LOG_DB_PATH="<full_path_to_root_folder>/run_tests.db" tests_integration/test_all.sh
```

## Observing the test data
You can look at the test results using an external database reader or the basic SQLite command line tool:
```
sqlite3 run_tests.db
```

Once in SQLite you can show the tables and observe that after running some tests a table called `unit_test_generation_attempts` has been created:
```
sqlite> .tables
unit_test_generation_attempts
```

To get the definition of our table we can run:
```
sqlite> PRAGMA table_info(unit_test_generation_attempts);
0|id|INTEGER|1||1
1|run_time|DATETIME|0||0
2|status|VARCHAR|0||0
3|reason|TEXT|0||0
4|exit_code|INTEGER|0||0
5|stderr|TEXT|0||0
6|stdout|TEXT|0||0
7|test_code|TEXT|0||0
8|imports|TEXT|0||0
```

A simple `select * from unit_test_generation_attempts;` query will display all test results (which include formatted carriage returns). This may be a bit difficult to look at from the command line so using a GUI would probably serve you a bit better.

You can also filter the results to show only failed tests, for example:
```
sqlite> select * from unit_test_generation_attempts where status = "FAIL";
```
File renamed without changes.
Loading

0 comments on commit e9f9467

Please sign in to comment.