Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show CDSA workspaces in data summary view #526

Merged
merged 2 commits into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions primed/primed_anvil/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from anvil_consortium_manager.models import WorkspaceGroupSharing
from django.db.models import Exists, F, OuterRef, Value

from primed.cdsa.models import CDSAWorkspace
from primed.dbgap.models import dbGaPWorkspace
from primed.miscellaneous_workspaces.models import OpenAccessWorkspace

Expand Down Expand Up @@ -34,7 +35,7 @@ def get_summary_table_data():
"access_mechanism",
# Rename columns to have the same names.
workspace_name=F("workspace__name"),
study=F("dbgap_study_accession__studies__short_name"),
study_name=F("dbgap_study_accession__studies__short_name"),
data=F("available_data__name"),
)
df_dbgap = pd.DataFrame.from_dict(dbgap)
Expand All @@ -48,11 +49,25 @@ def get_summary_table_data():
"access_mechanism",
# Rename columns to have the same names.
workspace_name=F("workspace__name"),
study=F("studies__short_name"),
study_name=F("studies__short_name"),
data=F("available_data__name"),
)
df_open = pd.DataFrame.from_dict(open)

# Query for CDSAWorkspaces.
cdsa = CDSAWorkspace.objects.annotate(
access_mechanism=Value("CDSA"),
is_shared=Exists(shared),
).values(
"is_shared",
"access_mechanism",
# Rename columns to have the same names.
workspace_name=F("workspace__name"),
study_name=F("study__short_name"),
data=F("available_data__name"),
)
df_cdsa = pd.DataFrame.from_dict(cdsa)

# This union may not work with MySQL < 10.3:
# https://code.djangoproject.com/ticket/31445
# qs = dbgap.union(open)
Expand All @@ -65,20 +80,20 @@ def get_summary_table_data():
# df = pd.DataFrame.from_dict(qs)

# Instead combine in pandas.
df = pd.concat([df_dbgap, df_open])
df = pd.concat([df_cdsa, df_dbgap, df_open])

# If there are no workspaces, return an empty list.
if df.empty:
return []

# Sort by specific columns
df = df.sort_values(by=["study", "access_mechanism"])
df = df.sort_values(by=["study_name", "access_mechanism"])
# Concatenate multiple studies into a single comma-delimited string.
df = (
df.groupby(
["workspace_name", "data", "is_shared", "access_mechanism"],
dropna=False,
)["study"]
)["study_name"]
.apply(lambda x: ", ".join(x))
.reset_index()
.drop("workspace_name", axis=1)
Expand All @@ -90,7 +105,7 @@ def get_summary_table_data():
data = (
pd.pivot_table(
df,
index=["study", "is_shared", "access_mechanism"],
index=["study_name", "is_shared", "access_mechanism"],
columns=["data"],
# set this to len to count the number of workspaces instead of returning a boolean value.
aggfunc=lambda x: len(x) > 0,
Expand All @@ -100,6 +115,7 @@ def get_summary_table_data():
)
.rename_axis(columns=None)
.reset_index()
.rename(columns={"study_name": "study", "B": "c"})
)
# Remove the dummy "no_data" column if it exists.
if "no_data" in data:
Expand Down
Loading
Loading