Skip to content

Commit

Permalink
Merge pull request #535 from UW-GAC/feature/phenotype-inventory-workf…
Browse files Browse the repository at this point in the history
…low-input

Generate input for the phenotype inventory workflow
  • Loading branch information
amstilp authored Apr 29, 2024
2 parents 13ae966 + f0c2327 commit 4ae379d
Show file tree
Hide file tree
Showing 8 changed files with 612 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ repos:
- id: isort

- repo: https://github.com/PyCQA/flake8
rev: 3.9.2
rev: 7.0.0
hooks:
- id: flake8
args: ['--config=setup.cfg']
Expand Down
50 changes: 50 additions & 0 deletions add_phenotype_inventory_input_example_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Temporary script to create some test data.
# Run with: python manage.py shell < add_phenotype_inventory_input_example_data.py

from anvil_consortium_manager.tests.factories import (
ManagedGroupFactory,
WorkspaceGroupSharingFactory,
)

from primed.cdsa.tests.factories import CDSAWorkspaceFactory
from primed.dbgap.tests.factories import dbGaPWorkspaceFactory
from primed.miscellaneous_workspaces.tests.factories import OpenAccessWorkspaceFactory
from primed.primed_anvil.tests.factories import StudyFactory

# Create a dbGaP workspace.
fhs = StudyFactory.create(short_name="FHS", full_name="Framingham Heart Study")
workspace_dbgap = dbGaPWorkspaceFactory.create(
dbgap_study_accession__dbgap_phs=7,
dbgap_study_accession__studies=[fhs],
dbgap_version=33,
dbgap_participant_set=12,
dbgap_consent_code=1,
dbgap_consent_abbreviation="HMB",
workspace__name="DBGAP_FHS_v33_p12_HMB",
)


# Create a CDSA workspace.
workspace_cdsa = CDSAWorkspaceFactory.create(
study__short_name="MESA",
workspace__name="CDSA_MESA_HMB",
)

# Create an open access workspace
workspace_open_access = OpenAccessWorkspaceFactory.create(
workspace__name="OPEN_ACCESS_FHS",
)
workspace_open_access.studies.add(fhs)


# Share workspaces with PRIMED_ALL
primed_all = ManagedGroupFactory.create(name="PRIMED_ALL")
WorkspaceGroupSharingFactory.create(
workspace=workspace_dbgap.workspace, group=primed_all
)
WorkspaceGroupSharingFactory.create(
workspace=workspace_cdsa.workspace, group=primed_all
)
WorkspaceGroupSharingFactory.create(
workspace=workspace_open_access.workspace, group=primed_all
)
100 changes: 98 additions & 2 deletions primed/primed_anvil/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from itertools import groupby

import pandas as pd
from anvil_consortium_manager.models import WorkspaceGroupSharing
from django.db.models import Exists, F, OuterRef, Value
from anvil_consortium_manager.models import ManagedGroup, WorkspaceGroupSharing
from django.db.models import CharField, Exists, F, OuterRef, Value
from django.db.models.functions import Concat

from primed.cdsa.models import CDSAWorkspace
from primed.dbgap.models import dbGaPWorkspace
Expand Down Expand Up @@ -127,3 +130,96 @@ def get_summary_table_data():
# Convert to a list of dictionaries for passing to the django-tables2 table.
data = data.to_dict(orient="records")
return data


def get_workspaces_for_phenotype_inventory():
"""Get input to the primed-phenotype-inventory workflow.
This function generates the input for the "workspaces" field of the primed-phenotype-inventory workflow. Only
workspaces that have been shared with the consortium are included.
See dockstore link: https://dockstore.org/workflows/github.com/UW-GAC/primed-inventory-workflows/primed_phenotype_inventory:main?tab=info
The "workspaces" field has the format:
{
"billing-project-1/workspace-1": "study1, study2",
"billing-project-2/workspace-2": "study3",
...
}
""" # noqa: E501

# primed-all group. We will need this to determine if the workspace is shared with PRIMED_ALL.
primed_all = ManagedGroup.objects.get(name="PRIMED_ALL")

dbgap_workspaces = (
dbGaPWorkspace.objects.filter(
# Just those that are shared with PRIMED_ALL.
workspace__workspacegroupsharing__group=primed_all,
)
.annotate(
workspace_name=Concat(
F("workspace__billing_project__name"),
Value("/"),
F("workspace__name"),
output_field=CharField(),
),
study_names=F("dbgap_study_accession__studies__short_name"),
)
.values(
# "workspace",
# "workspace_billing_project",
"workspace_name",
"study_names",
)
)

cdsa_workspaces = (
CDSAWorkspace.objects.filter(
# Just those that are shared with PRIMED_ALL.
workspace__workspacegroupsharing__group=primed_all,
)
.annotate(
workspace_name=Concat(
F("workspace__billing_project__name"),
Value("/"),
F("workspace__name"),
output_field=CharField(),
),
study_names=F("study__short_name"),
)
.values(
"workspace_name",
"study_names",
)
)

open_access_workspaces = (
OpenAccessWorkspace.objects.filter(
# Just those that are shared with PRIMED_ALL.
workspace__workspacegroupsharing__group=primed_all,
)
.annotate(
workspace_name=Concat(
F("workspace__billing_project__name"),
Value("/"),
F("workspace__name"),
output_field=CharField(),
),
study_names=F("studies__short_name"),
)
.values(
"workspace_name",
"study_names",
)
)

# Combine all querysets and process into the expected output for the AnVIL workflow.
workspaces = dbgap_workspaces.union(cdsa_workspaces).union(open_access_workspaces)

json = {}
for key, group in groupby(workspaces, lambda x: x["workspace_name"]):
study_names = [x["study_names"] if x["study_names"] else "" for x in group]
if not study_names:
study_names = ""
json[key] = ", ".join(sorted(study_names))

return json
Loading

0 comments on commit 4ae379d

Please sign in to comment.