diff --git a/CHANGELOG.md b/CHANGELOG.md
index 908786de..1561fa6e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## [1.0.9] - 2024-04-03
+
+### Added:
+- Support multiple tables
+
+### Fixed
+- Spatial elements not saved when data is not backed
+
## [1.0.8] - 2024-04-02
Hotfix: resolve issues related to `spatialdata>=1.0.0`
diff --git a/docs/tutorials/api_usage.ipynb b/docs/tutorials/api_usage.ipynb
index 390f911f..f8e13927 100644
--- a/docs/tutorials/api_usage.ipynb
+++ b/docs/tutorials/api_usage.ipynb
@@ -513,7 +513,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Now, `sdata.table` is an `AnnData` object.\n",
+ "Now, `sdata.tables[\"table\"]` is an `AnnData` object.\n",
"- If you count the transcripts, then `adata.X` are the raw counts\n",
"- If you average the channel intensities, then `adata.X` are the channels intensities\n",
"- If you both count the transcript and average the intensities, then `adata.X` are the raw counts, and `adata.obsm[\"intensities\"]` are the channels intensities"
@@ -585,7 +585,7 @@
" \"CD3\": \"T cell\"\n",
"}\n",
"\n",
- "higher_z_score(sdata.table, marker_cell_dict)"
+ "higher_z_score(sdata.tables[\"table\"], marker_cell_dict)"
]
},
{
diff --git a/docs/tutorials/he.ipynb b/docs/tutorials/he.ipynb
index 00910b88..ce3c2a0c 100644
--- a/docs/tutorials/he.ipynb
+++ b/docs/tutorials/he.ipynb
@@ -437,7 +437,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The resulting `GeoDataFrame` may have more columns than cells, because one cell may be inside multiple patches. We will keep only the first patch, and then save the resulting `\"cluster\"` column into the `sdata.table`."
+ "The resulting `GeoDataFrame` may have more columns than cells, because one cell may be inside multiple patches. We will keep only the first patch, and then save the resulting `\"cluster\"` column into the `sdata.tables[\"table\"]`."
]
},
{
@@ -446,7 +446,7 @@
"metadata": {},
"outputs": [],
"source": [
- "sdata.table.obs[\"cluster\"] = res_gdf[~res_gdf.index.duplicated()][\"cluster\"].values"
+ "sdata.tables[\"table\"].obs[\"cluster\"] = res_gdf[~res_gdf.index.duplicated()][\"cluster\"].values"
]
},
{
diff --git a/pyproject.toml b/pyproject.toml
index b3de3424..51b77ca6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "sopa"
-version = "1.0.8"
+version = "1.0.9"
description = "Spatial-omics pipeline and analysis"
documentation = "https://gustaveroussy.github.io/sopa"
homepage = "https://gustaveroussy.github.io/sopa"
diff --git a/sopa/_constants.py b/sopa/_constants.py
index cf1f8600..5c72c651 100644
--- a/sopa/_constants.py
+++ b/sopa/_constants.py
@@ -2,6 +2,7 @@ class SopaKeys:
CELLPOSE_BOUNDARIES = "cellpose_boundaries"
BAYSOR_BOUNDARIES = "baysor_boundaries"
PATCHES = "sopa_patches"
+ TABLE = "table"
BOUNDS = "bboxes"
PATCHES_ILOCS = "ilocs"
diff --git a/sopa/_sdata.py b/sopa/_sdata.py
index cca5e8b5..40c0fe67 100644
--- a/sopa/_sdata.py
+++ b/sopa/_sdata.py
@@ -130,13 +130,17 @@ def get_item(sdata: SpatialData, attr: str, key: str | None = None):
def get_intensities(sdata: SpatialData) -> pd.DataFrame | None:
"""Gets the intensity dataframe of shape `n_obs x n_channels`"""
- if not sdata.table.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_INTENSITIES]:
+ assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"
+
+ adata = sdata.tables[SopaKeys.TABLE]
+
+ if not adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_INTENSITIES]:
return None
- if sdata.table.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_TRANSCRIPTS]:
- return sdata.table.obsm[SopaKeys.INTENSITIES_OBSM]
+ if adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_TRANSCRIPTS]:
+ return adata.obsm[SopaKeys.INTENSITIES_OBSM]
- return sdata.table.to_df()
+ return adata.to_df()
def iter_scales(image: MultiscaleSpatialImage) -> Iterator[xr.DataArray]:
@@ -187,6 +191,9 @@ def save_shapes(
name: str,
overwrite: bool = False,
) -> None:
+ if not sdata.is_backed():
+ return
+
elem_group = sdata._init_add_element(name=name, element_type="shapes", overwrite=overwrite)
write_shapes(
shapes=sdata.shapes[name],
@@ -200,6 +207,9 @@ def save_image(
name: str,
overwrite: bool = False,
) -> None:
+ if not sdata.is_backed():
+ return
+
elem_group = sdata._init_add_element(name=name, element_type="images", overwrite=overwrite)
write_image(
image=sdata.images[name],
@@ -213,11 +223,14 @@ def save_image(
assert elem_group.path == "images"
path = Path(elem_group.store.path) / "images" / name
image = _read_multiscale(path, raster_type="image")
- sdata._add_image_in_memory(name=name, image=image, overwrite=True)
+ sdata.images[name] = image
+
+def save_table(sdata: SpatialData, name: str):
+ if not sdata.is_backed():
+ return
-def save_table(sdata: SpatialData):
store = parse_url(sdata.path, mode="r+").store
root = zarr.group(store=store)
- elem_group = root.require_group(name="table")
- write_table(table=sdata.table, group=elem_group, name="table")
+ elem_group = root.require_group(name="tables")
+ write_table(table=sdata.tables[name], group=elem_group, name=name)
diff --git a/sopa/annotation/tangram/run.py b/sopa/annotation/tangram/run.py
index c7b6a423..85507af2 100644
--- a/sopa/annotation/tangram/run.py
+++ b/sopa/annotation/tangram/run.py
@@ -41,7 +41,9 @@ def tangram_annotate(
bag_size: Size of each bag on which tangram will be run. Use smaller bags to lower the RAM usage
max_obs_reference: Maximum number of cells used in `adata_sc` at each level. Decrease it to lower the RAM usage.
"""
- ad_sp = sdata.table
+ assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"
+
+ ad_sp = sdata.tables[SopaKeys.TABLE]
MultiLevelAnnotation(
ad_sp,
diff --git a/sopa/cli/annotate.py b/sopa/cli/annotate.py
index 74060f11..deb028ca 100644
--- a/sopa/cli/annotate.py
+++ b/sopa/cli/annotate.py
@@ -21,17 +21,17 @@ def fluorescence(
For each cell, one z-score statistic is computed and the population with the highest z-score is attributed.
"""
- from pathlib import Path
-
+ from sopa._constants import SopaKeys
+ from sopa._sdata import save_table
from sopa.annotation.fluorescence import higher_z_score
from sopa.io.standardize import read_zarr_standardized
sdata = read_zarr_standardized(sdata_path)
- assert sdata.table is not None, "Annotation requires `sdata.table` to be not None"
+ assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"
- higher_z_score(sdata.table, marker_cell_dict, cell_type_key)
- sdata.table.write_zarr(Path(sdata_path) / "table" / "table")
+ higher_z_score(sdata.tables[SopaKeys.TABLE], marker_cell_dict, cell_type_key)
+ save_table(sdata, SopaKeys.TABLE)
@app_annotate.command()
@@ -55,10 +55,10 @@ def tangram(
),
):
"""Tangram segmentation (i.e., uses an annotated scRNAseq reference to transfer cell-types)"""
- from pathlib import Path
-
import anndata
+ from sopa._constants import SopaKeys
+ from sopa._sdata import save_table
from sopa.annotation.tangram.run import tangram_annotate
from sopa.io.standardize import read_zarr_standardized
@@ -73,4 +73,4 @@ def tangram(
bag_size=bag_size,
max_obs_reference=max_obs_reference,
)
- sdata.table.write_zarr(Path(sdata_path) / "table" / "table")
+ save_table(sdata, SopaKeys.TABLE)
diff --git a/sopa/io/explorer/converter.py b/sopa/io/explorer/converter.py
index d0d7cbed..ceeb7576 100644
--- a/sopa/io/explorer/converter.py
+++ b/sopa/io/explorer/converter.py
@@ -7,6 +7,7 @@
import geopandas as gpd
from spatialdata import SpatialData
+from ..._constants import SopaKeys
from ..._sdata import get_boundaries, get_element, get_spatial_image, to_intrinsic
from . import (
write_cell_categories,
@@ -87,7 +88,7 @@ def write(
points_key: Name of the transcripts (key of `sdata.points`).
gene_column: Column name of the points dataframe containing the gene names.
pixel_size: Number of microns in a pixel. Invalid value can lead to inconsistent scales in the Explorer.
- layer: Layer of `sdata.table` where the gene counts are saved. If `None`, uses `sdata.table.X`.
+ layer: Layer of the AnnData table where the gene counts are saved. If `None`, uses `table.X`.
polygon_max_vertices: Maximum number of vertices for the cell polygons.
lazy: If `True`, will not load the full images in memory (except if the image memory is below `ram_threshold_gb`).
ram_threshold_gb: Threshold (in gygabytes) from which image can be loaded in memory. If `None`, the image is never loaded in memory.
@@ -100,8 +101,8 @@ def write(
image_key, image = get_spatial_image(sdata, image_key, return_key=True)
### Saving cell categories and gene counts
- if sdata.table is not None:
- adata = sdata.table
+ if SopaKeys.TABLE in sdata.tables:
+ adata = sdata.tables[SopaKeys.TABLE]
shapes_key = adata.uns["spatialdata_attrs"]["region"]
geo_df = sdata[shapes_key]
@@ -120,7 +121,7 @@ def write(
if _should_save(mode, "b") and geo_df is not None:
geo_df = to_intrinsic(sdata, geo_df, image_key)
- if sdata.table is not None:
+ if SopaKeys.TABLE in sdata.tables:
geo_df = geo_df.loc[adata.obs[adata.uns["spatialdata_attrs"]["instance_key"]]]
write_polygons(path, geo_df.geometry, polygon_max_vertices, pixel_size=pixel_size)
@@ -145,16 +146,16 @@ def write(
if _should_save(mode, "m"):
write_metadata(path, image_key, shapes_key, _get_n_obs(sdata, geo_df), pixel_size)
- if save_h5ad:
- sdata.table.write_h5ad(path / FileNames.H5AD)
+ if save_h5ad and SopaKeys.TABLE in sdata.tables:
+ sdata.tables[SopaKeys.TABLE].write_h5ad(path / FileNames.H5AD)
log.info(f"Saved files in the following directory: {path}")
log.info(f"You can open the experiment with 'open {path / FileNames.METADATA}'")
def _get_n_obs(sdata: SpatialData, geo_df: gpd.GeoDataFrame) -> int:
- if sdata.table is not None:
- return sdata.table.n_obs
+ if SopaKeys.TABLE in sdata.tables:
+ return sdata.tables[SopaKeys.TABLE].n_obs
return len(geo_df) if geo_df is not None else 0
diff --git a/sopa/io/report/generate.py b/sopa/io/report/generate.py
index 0a58ccda..71f7bc86 100644
--- a/sopa/io/report/generate.py
+++ b/sopa/io/report/generate.py
@@ -56,11 +56,12 @@ def _kdeplot_vmax_quantile(values: np.ndarray, quantile: float = 0.95):
class SectionBuilder:
def __init__(self, sdata: SpatialData):
self.sdata = sdata
+ self.adata = self.sdata.tables.get(SopaKeys.TABLE)
def _table_has(self, key, default=False):
- if SopaKeys.UNS_KEY not in self.sdata.table.uns:
+ if SopaKeys.UNS_KEY not in self.adata.uns:
return default
- return self.sdata.table.uns[SopaKeys.UNS_KEY].get(key, default)
+ return self.adata.uns[SopaKeys.UNS_KEY].get(key, default)
def general_section(self):
log.info("Writing general section")
@@ -87,7 +88,7 @@ def cell_section(self):
coord_system = get_intrinsic_cs(self.sdata, shapes_key)
fig = plt.figure()
- _kdeplot_vmax_quantile(self.sdata.table.obs[SopaKeys.AREA_OBS])
+ _kdeplot_vmax_quantile(self.adata.obs[SopaKeys.AREA_OBS])
plt.xlabel("Area (coordinate_system_unit ^ 2)")
return Section(
@@ -95,7 +96,7 @@ def cell_section(self):
[
SubSection(
"Number",
- Paragraph(f"Number of cells:
{Message(f'{self.sdata.table.n_obs} cells')}"),
+ Paragraph(f"Number of cells:
{Message(f'{self.adata.n_obs} cells')}"),
),
SubSection(
"Areas",
@@ -155,7 +156,7 @@ def transcripts_section(self):
log.info("Writing transcript section")
- mean_transcript_count = self.sdata.table.X.mean(0).A1
+ mean_transcript_count = self.adata.X.mean(0).A1
low_average = mean_transcript_count < LOW_AVERAGE_COUNT
QC_subsubsections = []
@@ -166,7 +167,7 @@ def transcripts_section(self):
)
)
QC_subsubsections.append(
- Message(", ".join(self.sdata.table.var_names[low_average]), color="danger")
+ Message(", ".join(self.adata.var_names[low_average]), color="danger")
)
fig1 = plt.figure()
@@ -174,7 +175,7 @@ def transcripts_section(self):
plt.xlabel("Count per transcript (average / cells)")
fig2 = plt.figure()
- _kdeplot_vmax_quantile(self.sdata.table.X.sum(1).A1)
+ _kdeplot_vmax_quantile(self.adata.X.sum(1).A1)
plt.xlabel("Transcript count per cell")
QC_subsubsections.append(Columns([Image(fig1), Image(fig2)]))
@@ -184,23 +185,21 @@ def transcripts_section(self):
def representation_section(self, max_obs: int = 400_000):
log.info("Writing representation section")
- adata = self.sdata.table
-
if self._table_has(SopaKeys.UNS_HAS_TRANSCRIPTS):
- sc.pp.normalize_total(adata)
- sc.pp.log1p(adata)
+ sc.pp.normalize_total(self.adata)
+ sc.pp.log1p(self.adata)
- if adata.n_obs > max_obs:
- sc.pp.subsample(adata, n_obs=max_obs)
+ if self.adata.n_obs > max_obs:
+ sc.pp.subsample(self.adata, n_obs=max_obs)
- log.info(f"Computing UMAP on {adata.n_obs} cells")
+ log.info(f"Computing UMAP on {self.adata.n_obs} cells")
- sc.pp.pca(adata)
- sc.pp.neighbors(adata)
- sc.tl.umap(adata)
+ sc.pp.pca(self.adata)
+ sc.pp.neighbors(self.adata)
+ sc.tl.umap(self.adata)
colors = self._table_has(SopaKeys.UNS_CELL_TYPES, None)
- sc.pl.umap(adata, color=colors, show=False)
+ sc.pl.umap(self.adata, color=colors, show=False)
return Section(
"Representation",
diff --git a/sopa/io/standardize.py b/sopa/io/standardize.py
index 78af74ae..1a2fbbb1 100644
--- a/sopa/io/standardize.py
+++ b/sopa/io/standardize.py
@@ -6,7 +6,7 @@
import spatialdata
from spatialdata import SpatialData
-from .._constants import VALID_DIMENSIONS
+from .._constants import VALID_DIMENSIONS, SopaKeys
from .._sdata import get_spatial_image
from ..utils.image import _check_integer_dtype
@@ -42,12 +42,12 @@ def sanity_check(sdata: SpatialData, delete_table: bool = False, warn: bool = Fa
# log.warn(f"Channel names are not strings. Converting {image_channels} to string values.")
# sdata[image_key].data = sdata[image_key].assign_coords(c=image_channels.astype(str))
- if sdata.table is not None:
+ if SopaKeys.TABLE in sdata.tables:
if delete_table:
log.info(
- "The table (i.e. `sdata.table`) will not be saved, since it will be created later by sopa"
+ f"The table `sdata.tables['{SopaKeys.TABLE}']` will not be saved, since it will be created later by sopa"
)
- del sdata.table
+ del sdata.tables[SopaKeys.TABLE]
def read_zarr_standardized(path: str, warn: bool = False) -> SpatialData:
@@ -73,8 +73,8 @@ def write_standardized(sdata: SpatialData, sdata_path: str, delete_table: bool =
sanity_check(sdata, delete_table)
assert (
- sdata.table is None
- ), "sdata.table exists. Delete it you want to use sopa, to avoid conflicts with future table generation"
+ SopaKeys.TABLE not in sdata.tables
+ ), f"sdata.tables['{SopaKeys.TABLE}'] exists. Delete it you want to use sopa, to avoid conflicts with future table generation"
if len(sdata.points) == 0:
log.warn("No transcripts found. Some tools from sopa will not be available.")
diff --git a/sopa/segmentation/aggregate.py b/sopa/segmentation/aggregate.py
index 3f02a82c..de4141b0 100644
--- a/sopa/segmentation/aggregate.py
+++ b/sopa/segmentation/aggregate.py
@@ -63,13 +63,13 @@ def __init__(
self.shapes_key = shapes_key
self.geo_df = self.sdata[shapes_key]
- if sdata.table is not None and len(self.geo_df) != sdata.table.n_obs:
- log.warn(
- f"Table already existing with {sdata.table.n_obs} obs, but aggregating on {len(self.geo_df)} cells. Deleting table."
- )
- del sdata.table
-
- self.table = sdata.table
+ self.table = None
+ if SopaKeys.TABLE in sdata.tables:
+ table = sdata.tables[SopaKeys.TABLE]
+ if len(self.geo_df) != table.n_obs:
+ log.warn("Not using existing table (aggregating on a different number of cells)")
+ else:
+ self.table = table
def standardize_table(self):
self.table.obs_names = list(map(str_cell_id, range(self.table.n_obs)))
@@ -101,10 +101,8 @@ def standardize_table(self):
instance_key=SopaKeys.INSTANCE_KEY,
)
- if self.sdata.table is not None and self.overwrite:
- del self.sdata.table
- self.sdata.tables["table"] = self.table
- save_table(self.sdata)
+ self.sdata.tables[SopaKeys.TABLE] = self.table
+ save_table(self.sdata, SopaKeys.TABLE)
def filter_cells(self, where_filter: np.ndarray):
log.info(f"Filtering {where_filter.sum()} cells")
diff --git a/sopa/segmentation/baysor/resolve.py b/sopa/segmentation/baysor/resolve.py
index a66d0190..dd7816ce 100644
--- a/sopa/segmentation/baysor/resolve.py
+++ b/sopa/segmentation/baysor/resolve.py
@@ -16,7 +16,7 @@
from tqdm import tqdm
from ..._constants import SopaKeys
-from ..._sdata import get_element, get_key, save_shapes
+from ..._sdata import get_element, get_key, save_shapes, save_table
from .. import aggregate, shapes
log = logging.getLogger(__name__)
@@ -174,12 +174,9 @@ def resolve(
sdata.shapes[SopaKeys.BAYSOR_BOUNDARIES] = geo_df
save_shapes(sdata, SopaKeys.BAYSOR_BOUNDARIES, overwrite=True)
- if sdata.table is not None:
- log.warn("Table already existing. It will be replaced by the new one.")
- del sdata.table
-
- sdata.table = table
+ sdata.tables[SopaKeys.TABLE] = table
+ save_table(sdata, SopaKeys.TABLE)
log.info(
- f"Added sdata.table, and {len(geo_df)} cell boundaries to sdata['{SopaKeys.BAYSOR_BOUNDARIES}']"
+ f"Added sdata.tables['{SopaKeys.TABLE}'], and {len(geo_df)} cell boundaries to sdata['{SopaKeys.BAYSOR_BOUNDARIES}']"
)
diff --git a/sopa/spatial/_build.py b/sopa/spatial/_build.py
index 196311eb..d5d01590 100644
--- a/sopa/spatial/_build.py
+++ b/sopa/spatial/_build.py
@@ -19,6 +19,8 @@
from sklearn.metrics.pairwise import euclidean_distances
from spatialdata import SpatialData
+from .._constants import SopaKeys
+
log = logging.getLogger(__name__)
__all__ = ["spatial_neighbors"]
@@ -40,7 +42,7 @@ def spatial_neighbors(
set_diag: Whether to set the diagonal of the spatial connectivities to `1.0`.
"""
if isinstance(adata, SpatialData):
- adata = adata.table
+ adata = adata.tables[SopaKeys.TABLE]
assert (
radius is None or len(radius) == 2
diff --git a/sopa/spatial/distance.py b/sopa/spatial/distance.py
index 211e1845..f59c8858 100644
--- a/sopa/spatial/distance.py
+++ b/sopa/spatial/distance.py
@@ -8,6 +8,7 @@
from spatialdata import SpatialData
from tqdm import tqdm
+from .._constants import SopaKeys
from ._build import _check_has_delaunay
log = logging.getLogger(__name__)
@@ -31,7 +32,7 @@ def cells_to_groups(
A `Dataframe` of shape `n_obs * n_groups`, or `None` if `key_added_prefix` was provided (in this case, the dataframe is saved in `"{key_added_prefix}{group_key}"`)
"""
if isinstance(adata, SpatialData):
- adata = adata.table
+ adata = adata.tables[SopaKeys.TABLE]
_check_has_delaunay(adata)
@@ -91,7 +92,7 @@ def mean_distance(
`DataFrame` of shape `n_groups * n_groups_target` of mean hop-distances
"""
if isinstance(adata, SpatialData):
- adata = adata.table
+ adata = adata.tables[SopaKeys.TABLE]
target_group_key = group_key if target_group_key is None else target_group_key
diff --git a/sopa/spatial/morpho.py b/sopa/spatial/morpho.py
index 84402675..47cc6e69 100644
--- a/sopa/spatial/morpho.py
+++ b/sopa/spatial/morpho.py
@@ -42,7 +42,7 @@ def geometrize_niches(
A `GeoDataFrame` with geometries for each niche component. We also compute the area/perimeter/roundness of each component.
"""
if isinstance(adata, SpatialData):
- adata = adata.table
+ adata = adata.tables[SopaKeys.TABLE]
_check_has_delaunay(adata)
data = {"geometry": [], niche_key: []}
@@ -138,7 +138,7 @@ def niches_geometry_stats(
A `DataFrame` of shape `n_niches * n_statistics`
"""
if isinstance(adata, SpatialData):
- adata = adata.table
+ adata = adata.tables[SopaKeys.TABLE]
gdf = geometrize_niches(adata, niche_key, **geometrize_niches_kwargs)
value_counts = gdf[niche_key].value_counts()
diff --git a/workflow/utils.py b/workflow/utils.py
index f78a0b0f..c45dfe69 100644
--- a/workflow/utils.py
+++ b/workflow/utils.py
@@ -47,7 +47,7 @@ def __init__(self, config: dict) -> None:
self.shapes_dir = self.sdata_path / "shapes"
self.points_dir = self.sdata_path / "points"
self.images_dir = self.sdata_path / "images"
- self.table_dir = self.sdata_path / "table"
+ self.table_dir = self.sdata_path / "tables"
# sdata.shapes
self.baysor_boundaries = self.shapes_dir / "baysor_boundaries"