Skip to content

Commit

Permalink
Merge pull request #526 from UW-GAC/feature/data-summary-cdsa-workspaces
Browse files Browse the repository at this point in the history
Show CDSA workspaces in data summary view
  • Loading branch information
amstilp authored Apr 6, 2024
2 parents 70cf339 + edafe17 commit 46637c0
Show file tree
Hide file tree
Showing 3 changed files with 321 additions and 43 deletions.
28 changes: 22 additions & 6 deletions primed/primed_anvil/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from anvil_consortium_manager.models import WorkspaceGroupSharing
from django.db.models import Exists, F, OuterRef, Value

from primed.cdsa.models import CDSAWorkspace
from primed.dbgap.models import dbGaPWorkspace
from primed.miscellaneous_workspaces.models import OpenAccessWorkspace

Expand Down Expand Up @@ -34,7 +35,7 @@ def get_summary_table_data():
"access_mechanism",
# Rename columns to have the same names.
workspace_name=F("workspace__name"),
study=F("dbgap_study_accession__studies__short_name"),
study_name=F("dbgap_study_accession__studies__short_name"),
data=F("available_data__name"),
)
df_dbgap = pd.DataFrame.from_dict(dbgap)
Expand All @@ -48,11 +49,25 @@ def get_summary_table_data():
"access_mechanism",
# Rename columns to have the same names.
workspace_name=F("workspace__name"),
study=F("studies__short_name"),
study_name=F("studies__short_name"),
data=F("available_data__name"),
)
df_open = pd.DataFrame.from_dict(open)

# Query for CDSAWorkspaces.
cdsa = CDSAWorkspace.objects.annotate(
access_mechanism=Value("CDSA"),
is_shared=Exists(shared),
).values(
"is_shared",
"access_mechanism",
# Rename columns to have the same names.
workspace_name=F("workspace__name"),
study_name=F("study__short_name"),
data=F("available_data__name"),
)
df_cdsa = pd.DataFrame.from_dict(cdsa)

# This union may not work with MySQL < 10.3:
# https://code.djangoproject.com/ticket/31445
# qs = dbgap.union(open)
Expand All @@ -65,20 +80,20 @@ def get_summary_table_data():
# df = pd.DataFrame.from_dict(qs)

# Instead combine in pandas.
df = pd.concat([df_dbgap, df_open])
df = pd.concat([df_cdsa, df_dbgap, df_open])

# If there are no workspaces, return an empty list.
if df.empty:
return []

# Sort by specific columns
df = df.sort_values(by=["study", "access_mechanism"])
df = df.sort_values(by=["study_name", "access_mechanism"])
# Concatenate multiple studies into a single comma-delimited string.
df = (
df.groupby(
["workspace_name", "data", "is_shared", "access_mechanism"],
dropna=False,
)["study"]
)["study_name"]
.apply(lambda x: ", ".join(x))
.reset_index()
.drop("workspace_name", axis=1)
Expand All @@ -90,7 +105,7 @@ def get_summary_table_data():
data = (
pd.pivot_table(
df,
index=["study", "is_shared", "access_mechanism"],
index=["study_name", "is_shared", "access_mechanism"],
columns=["data"],
# set this to len to count the number of workspaces instead of returning a boolean value.
aggfunc=lambda x: len(x) > 0,
Expand All @@ -100,6 +115,7 @@ def get_summary_table_data():
)
.rename_axis(columns=None)
.reset_index()
.rename(columns={"study_name": "study", "B": "c"})
)
# Remove the dummy "no_data" column if it exists.
if "no_data" in data:
Expand Down
Loading

0 comments on commit 46637c0

Please sign in to comment.