Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] Add support for Visium v1 #3510

Merged
merged 3 commits into from
Jan 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 41 additions & 42 deletions apis/python/src/tiledbsoma/io/spatial/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,7 @@
import json
import warnings
from pathlib import Path
from typing import (
TYPE_CHECKING,
List,
Sequence,
Tuple,
Type,
Union,
)
from typing import TYPE_CHECKING, List, Sequence, Tuple, Type

import attrs
import numpy as np
Expand Down Expand Up @@ -104,7 +97,7 @@
@classmethod
def from_base_folder(
cls,
base_path: Union[str, Path],
base_path: str | Path,
*,
gene_expression: str | Path | None = None,
scale_factors: str | Path | None = None,
Expand Down Expand Up @@ -162,9 +155,9 @@
@classmethod
def from_spatial_folder(
cls,
spatial_dir: Union[str, Path],
spatial_dir: str | Path,
gene_expression: str | Path,
*,
gene_expression: Union[str, Path],
scale_factors: str | Path | None = None,
tissue_positions: str | Path | None = None,
fullres_image: str | Path | None = None,
Expand All @@ -189,30 +182,22 @@
try:
version = _read_visium_software_version(gene_expression)
except (KeyError, ValueError):
warnings.warn(
raise ValueError(

Check warning on line 185 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L185

Added line #L185 was not covered by tests
"Unable to determine Space Ranger vesion from gene expression file."
)
major_version = version[0] if isinstance(version, tuple) else version

# Find the tissue positions file path if it wasn't supplied.
if tissue_positions is None:
major_version = version[0] if isinstance(version, tuple) else version

Check warning on line 191 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L191

Added line #L191 was not covered by tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not using the property instead?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The class wasn't created yet.

if major_version == 1:
possible_file_names = [
"tissue_positions_list.csv",
"tissue_positions.csv",
]
else:
possible_file_names = [
"tissue_positions.csv",
"tissue_positions_list.csv",
]
for possible in possible_file_names:
tissue_positions = spatial_dir / possible
if tissue_positions.exists():
break
possible_file_name = "tissue_positions_list.csv"

Check warning on line 193 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L193

Added line #L193 was not covered by tests
else:
possible_file_name = "tissue_positions.csv"
tissue_positions = spatial_dir / possible_file_name
if not tissue_positions.exists():

Check warning on line 197 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L195-L197

Added lines #L195 - L197 were not covered by tests
raise OSError(
f"No tissue position file found in {spatial_dir}. Tried files: "
f"{possible_file_names}. If the file has been renamed it can be "
f"No tissue position file found in {spatial_dir}. Tried file: "
f"{possible_file_name}. If the file has been renamed it can be "
f"directly specified using argument `tissue_positions`."
)

Expand Down Expand Up @@ -247,17 +232,7 @@
lowres_image: Path | None = attrs.field(
converter=optional_path_converter, validator=optional_path_validator
)
version: int | Tuple[int, int, int] | None = attrs.field(default=None)

@version.validator
def _validate_version( # type: ignore[no-untyped-def]
self, attribute, value: int | Tuple[int, int, int] | None
) -> None:
major_version = value[0] if isinstance(value, tuple) else value
if major_version is not None and major_version != 2:
warnings.warn(
f"Support for Space Ranger version {value} has not been tests."
)
version: int | Tuple[int, int, int]

@property
def has_image(self) -> bool:
Expand All @@ -267,10 +242,14 @@
or self.lowres_image is not None
)

@property
def major_version(self) -> int:
return self.version[0] if isinstance(self.version, tuple) else self.version

Check warning on line 247 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L247

Added line #L247 was not covered by tests


def from_visium(
experiment_uri: str,
input_path: Union[Path, VisiumPaths],
input_path: Path | VisiumPaths,
measurement_name: str,
scene_name: str,
*,
Expand All @@ -284,7 +263,7 @@
image_channel_first: bool = True,
ingest_mode: IngestMode = "write",
use_relative_uri: bool | None = None,
X_kind: Union[Type[SparseNDArray], Type[DenseNDArray]] = SparseNDArray,
X_kind: Type[SparseNDArray] | Type[DenseNDArray] = SparseNDArray,
registration_mapping: "ExperimentAmbientLabelMapping | None" = None,
uns_keys: Sequence[str] | None = None,
additional_metadata: "AdditionalMetadata" = None,
Expand Down Expand Up @@ -423,6 +402,20 @@
else VisiumPaths.from_base_folder(input_path, use_raw_counts=use_raw_counts)
)

# Check the version.
major_version = (

Check warning on line 406 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L406

Added line #L406 was not covered by tests
input_paths.version[0]
if isinstance(input_paths.version, tuple)
else input_paths.version
)
if major_version is None:
raise ValueError("Unable to determine version number of Visium input")
if major_version not in {1, 2, 3}:
raise ValueError(

Check warning on line 414 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L411-L414

Added lines #L411 - L414 were not covered by tests
f"Visium version {input_paths.version} is not supported. Expected major "
f"version 1, 2, or 3."
)

# Get JSON scale factors.
with open(
input_paths.scale_factors, mode="r", encoding="utf-8"
Expand Down Expand Up @@ -572,6 +565,7 @@
with _write_visium_spots(
loc_uri,
input_paths.tissue_positions,
input_paths.major_version,
pixels_per_spot_diameter,
obs_df,
obs_id_name,
Expand Down Expand Up @@ -686,6 +680,7 @@
def _write_visium_spots(
df_uri: str,
input_tissue_positions: Path,
major_version: int,
spot_diameter: float,
obs_df: pd.DataFrame,
id_column_name: str,
Expand All @@ -698,8 +693,12 @@
"""Creates, opens, and writes data to a ``PointCloudDataFrame`` with the spot
locations and metadata. Returns the open dataframe for writing.
"""
if major_version == 1:
names = [id_column_name, "in_tissue", "array_row", "array_col", "y", "x"]

Check warning on line 697 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L696-L697

Added lines #L696 - L697 were not covered by tests
else:
names = None

Check warning on line 699 in apis/python/src/tiledbsoma/io/spatial/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/spatial/ingest.py#L699

Added line #L699 was not covered by tests
df = (
pd.read_csv(input_tissue_positions)
pd.read_csv(input_tissue_positions, names=names)
.rename(
columns={
"barcode": id_column_name,
Expand Down