Skip to content

Commit

Permalink
update unit test and add build script
Browse files Browse the repository at this point in the history
  • Loading branch information
murphycj committed Sep 30, 2024
1 parent f899b7a commit 323cdf8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 5 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ jobs:
pip install .
cd test
../bin/agfusion download -g hg19
../bin/agfusion download -s homo_sapiens -r 95
../bin/agfusion download -s homo_sapiens -r 111
../bin/agfusion download -s mus_musculus -r 84
pyensembl install --release 84 --species mus_musculus
pyensembl install --release 75 --species homo_sapiens
pyensembl install --release 95 --species homo_sapiens
pyensembl install --release 111 --species homo_sapiens
- name: Test with pytest
run: |
cd test
Expand Down
45 changes: 45 additions & 0 deletions bin/build_dbs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Download and build the agfusion database for a range of releases.
"""

import subprocess


def run_command(command):
"""Run a command and print it."""
print(f"Running: {command}")
subprocess.run(command, shell=True, check=True)


species = "homo_sapiens"

# Loop over releases from 96 to 110
for i in range(96, 112):
# Check if the file exists on S3
s3_check_command = f"aws s3 ls s3://agfusion/agfusion.{species}.{i}.db.gz"
result = subprocess.run(
s3_check_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
)

if result.returncode == 0:
print(f"File for release {i} already exists in S3. Skipping...")
continue

# continue

print(f"Building release {i}")

# Install the release using pyensembl
run_command(f"pyensembl install --release {i} --species {species}")

# Build the agfusion database
run_command(f"agfusion build -d . -s {species} -r {i} --pfam Pfam-A.clans.tsv")

# Compress the database
run_command(f"gzip agfusion.{species}.{i}.db")

# Upload the compressed file to S3
run_command(f"aws s3 cp agfusion.{species}.{i}.db.gz s3://agfusion")

# Delete all files for the release from pyensembl
run_command(f"pyensembl delete-all-files --release {i} --species {species}")
7 changes: 4 additions & 3 deletions test/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Unit tests.
"""

import unittest
from os.path import abspath, curdir, join

Expand All @@ -15,9 +16,9 @@
db_human = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.75.db")))
db_human.build = "homo_sapiens_75"

data_human95 = pyensembl.EnsemblRelease(95, "human")
db_human95 = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.95.db")))
db_human95.build = "homo_sapiens_95"
data_human95 = pyensembl.EnsemblRelease(111, "human")
db_human95 = database.AGFusionDB(abspath(join(curdir, "agfusion.homo_sapiens.111.db")))
db_human95.build = "homo_sapiens_111"


BASEDIR = "./data/FusionsFindingAlgorithms"
Expand Down

0 comments on commit 323cdf8

Please sign in to comment.