Skip to content

Commit

Permalink
Merge pull request #28 from antgonza/mv-update_job_step
Browse files Browse the repository at this point in the history
mv qclient.update_job_step
  • Loading branch information
charles-cowart authored Feb 20, 2024
2 parents abf2c9b + e59ab12 commit e62a555
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 17 deletions.
22 changes: 19 additions & 3 deletions qp_woltka/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from qiita_client import QiitaPlugin, QiitaCommand

from .woltka import woltka, woltka_syndna, calculate_cell_counts
from .woltka import (woltka, woltka_syndna, calculate_cell_counts,
calculate_rna_copy_counts)
from qp_woltka.util import generate_woltka_dflt_params, get_dbs, plugin_details
from os import environ

Expand Down Expand Up @@ -61,9 +62,9 @@
req_params, opt_params, outputs, dflt_param_set)
plugin.register_command(syndna_cmd)

# Cell counts
# WGS cell counts
req_params = {
'synDNA hits': ('artifact', ['BIOM']),
'SynDNA hits': ('artifact', ['BIOM']),
'Woltka per-genome': ('artifact', ['BIOM'])
}
opt_params = {
Expand All @@ -85,3 +86,18 @@
'Calculate Cell Counts', "Calculate cell counts per-genome",
calculate_cell_counts, req_params, opt_params, outputs, dflt_param_set)
plugin.register_command(calculate_cell_counts_cmd)


# MTX calculate RNA copy counts
req_params = {
'Woltka per-gene': ('artifact', ['BIOM'])
}
opt_params = {}
outputs = {
'RNA copy counts': 'BIOM'
}
dflt_param_set = {'default': {}}
calculate_rna_copy_counts_cmd = QiitaCommand(
'Calculate RNA Copy Counts', "Calculate RNA copy counts per-gene",
calculate_rna_copy_counts, req_params, opt_params, outputs, dflt_param_set)
plugin.register_command(calculate_rna_copy_counts_cmd)
24 changes: 20 additions & 4 deletions qp_woltka/tests/test_woltka.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from qp_woltka import plugin
from qp_woltka.woltka import (
woltka_to_array, woltka, woltka_syndna_to_array, woltka_syndna,
calculate_cell_counts)
calculate_cell_counts, calculate_rna_copy_counts)


class WoltkaTests(PluginTestCase):
Expand Down Expand Up @@ -557,7 +557,7 @@ def test_woltka_syndna_to_array(self):
self.assertCountEqual(ainfo, exp)

def test_calculate_cell_counts(self):
params = {'synDNA hits': 5, 'Woltka per-genome': 6,
params = {'SynDNA hits': 5, 'Woltka per-genome': 6,
'min_coverage': 1, 'read_length': 150,
'min_rsquared': 0.8}
job_id = 'my-job-id'
Expand All @@ -569,7 +569,7 @@ def test_calculate_cell_counts(self):
self.qclient, job_id, params, out_dir)
self.assertFalse(success)
self.assertEqual(msg, "No logs found, are you sure you selected the "
"correct artifact for 'synDNA hits'?")
"correct artifact for 'SynDNA hits'?")

# this should fail too because but now we are getting deeper into
# the validation
Expand Down Expand Up @@ -603,7 +603,7 @@ def test_calculate_cell_counts(self):
'type': "BIOM",
'name': "SynDNA Hits - Test",
'prep': pid}
params['synDNA hits'] = self.qclient.post(
params['SynDNA hits'] = self.qclient.post(
'/apitest/artifact/', data=data)['artifact']

success, ainfo, msg = calculate_cell_counts(
Expand All @@ -615,6 +615,22 @@ def test_calculate_cell_counts(self):

# Finally, adding a full test is close to impossible - too many steps.

def test_calculate_rna_copy_counts(self):
params = {'Woltka per-gene': 6}
job_id = 'my-job-id'
out_dir = mkdtemp()
self._clean_up_files.append(out_dir)

# this should fail cause we don't have valid data
success, ainfo, msg = calculate_rna_copy_counts(
self.qclient, job_id, params, out_dir)
self.assertFalse(success)
self.assertEqual(msg, "The selected 'Woltka per-gene' artifact "
"doesn't look like one, did you select the "
"correct file?")

# Finally, adding a full test is close to impossible - too many steps.


if __name__ == '__main__':
main()
73 changes: 68 additions & 5 deletions qp_woltka/woltka.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pandas as pd
from pysyndna import fit_linear_regression_models_for_qiita
from pysyndna import calc_ogu_cell_counts_per_g_of_sample_for_qiita
from pysyndna import calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita

from qp_woltka.util import search_by_filename

Expand Down Expand Up @@ -570,18 +571,18 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir):
"""
error = ''
# let's get the syndna_id and prep in a single go
syndna_id = parameters['synDNA hits']
syndna_id = parameters['SynDNA hits']
syndna_files, prep = qclient.artifact_and_preparation_files(syndna_id)
if 'log' not in syndna_files.keys():
error = ("No logs found, are you sure you selected the correct "
"artifact for 'synDNA hits'?")
"artifact for 'SynDNA hits'?")
else:

lin_regress_by_sample_id_fp = [f for f in syndna_files['log']
if 'lin_regress_by_sample_id' in f]
if not lin_regress_by_sample_id_fp:
error = ("No 'lin_regress_by_sample_id' log found, are you sure "
" you selected the correct artifact for 'synDNA hits'?")
" you selected the correct artifact for 'SynDNA hits'?")
else:
lin_regress_by_sample_id_fp = lin_regress_by_sample_id_fp[0]

Expand Down Expand Up @@ -615,8 +616,8 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir):
output = calc_ogu_cell_counts_per_g_of_sample_for_qiita(
sample_info, prep, lin_regress_by_sample_id_fp,
ogu_counts_per_sample, ogu_lengths_fp,
parameters['read_length'], parameters['min_rsquared'],
parameters['min_rsquared'])
int(parameters['read_length']), float(parameters['min_coverage']),
float(parameters['min_rsquared']))
except Exception as e:
return False, None, str(e)

Expand All @@ -631,3 +632,65 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir):
'Cell counts', 'BIOM', [(biom_fp, 'biom'), (log_fp, 'log')])]

return True, ainfo, ""


def calculate_rna_copy_counts(qclient, job_id, parameters, out_dir):
"""Run calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita
Parameters
----------
qclient : tgp.qiita_client.QiitaClient
The Qiita server client
job_id : str
The job id
parameters : dict
The parameter values to wolka syndna
out_dir : str
The path to the job's output directory
Returns
-------
bool, list, str
The results of the job
"""

per_gene_id = parameters['Woltka per-gene']
ainfo = qclient.get("/qiita_db/artifacts/%s/" % per_gene_id)
aparams = ainfo['processing_parameters']
pg_fp = ainfo['files']['biom'][0]['filepath']

if 'Database' not in aparams or not pg_fp.endswith('per-gene.biom'):
error = ("The selected 'Woltka per-gene' artifact doesn't "
"look like one, did you select the correct file?")
return False, None, error

pergene = load_table(pg_fp)
db_files = _process_database_files(aparams['Database'])
ogu_orf_coords_fp = db_files["gene_coordinates"]

_, prep_info = qclient.artifact_and_preparation_files(per_gene_id)

sample_info = qclient.get(
'/qiita_db/prep_template/%s/data/?sample_information=true'
% ainfo['prep_information'][0])
sample_info = pd.DataFrame.from_dict(
sample_info['data'], orient='index')
sample_info.reset_index(names='sample_name', inplace=True)

try:
output, log_msgs = calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita(
sample_info, prep_info, pergene, ogu_orf_coords_fp)
except Exception as e:
return False, None, str(e)

log_fp = f'{out_dir}/rna_copy_counts.log'
with open(log_fp, 'w') as f:
f.write(''.join(log_msgs))
biom_fp = f'{out_dir}/rna_copy_counts.biom'
with biom_open(biom_fp, 'w') as f:
output.to_hdf5(f, f"RNA copy counts - {job_id}")
ainfo = [
ArtifactInfo(
'RNA copy counts', 'BIOM', [(biom_fp, 'biom'), (log_fp, 'log')])]

return True, ainfo, ""
11 changes: 6 additions & 5 deletions scripts/start_woltka
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,23 @@ def execute(url, job_id, out_dir):
command = job_info['command']
parameters = job_info['parameters']

qclient.update_job_step(
job_id, "Step 1 of 4: Collecting info and generating submission")

# these were defined in qp_woltka/__init.py__ while defining the
# available commands for this plugin
valid_commands = [
'Woltka v0.1.4', 'SynDNA Woltka', 'Calculate Cell Counts']
'Woltka v0.1.4', 'SynDNA Woltka', 'Calculate Cell Counts',
'Calculate RNA Copy Counts']

# this if/elif is the current solution for
# https://github.com/qiita-spots/qiita/issues/3340
if command not in valid_commands:
raise ValueError(f'Not a valid command: "{command}"')
elif command == 'Calculate Cell Counts':
elif command in {'Calculate Cell Counts', 'Calculate RNA Copy Counts'}:
plugin(url, job_id, out_dir)
exit(0)

qclient.update_job_step(
job_id, "Step 1 of 4: Collecting info and generating submission")

artifact_id = parameters['input']
del parameters['input']
files, prep = qclient.artifact_and_preparation_files(artifact_id)
Expand Down

0 comments on commit e62a555

Please sign in to comment.