Skip to content

Commit

Permalink
Merge pull request #6 from HiDiHlabs/dev
Browse files Browse the repository at this point in the history
Finalize first release
  • Loading branch information
niklasmueboe authored Aug 5, 2024
2 parents d3b3de0 + 7ed4395 commit a947dc9
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 80 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ci:
autoupdate_schedule: quarterly
skip: [fmt, cargo-check]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ ndarray-stats = { version = "0.5.1" }
num = { version = "0.4.1" }
numpy = { version = "0.21.0" }
polars = { version = "0.41", features = ["partition_by", "dtype-categorical"] }
polars-arrow = { version = "0.41" }
pyo3 = { version = "0.21.0", features = ["extension-module"] }
pyo3-polars = { version = "0.15.0" }
rayon = { version = "1.8.0" }
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ This project follows the [SemVer](https://semver.org/) guidelines for versioning

## Citations

If you are using `sainsc` for your research please cite

Müller-Bötticher, N., Tiesmeyer, S., Eils, R., and Ishaque, N.
"Sainsc: a computational tool for segmentation-free analysis of in-situ capture"
bioRxiv (2024) https://doi.org/10.1101/2024.08.02.603879

## License

This project is licensed under the MIT License - for details please refer to the
Expand Down
10 changes: 7 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ It is easily integratable with the `scverse <https://github.com/scverse>`_
`AnnData <https://anndata.readthedocs.io/>`_ or
`SpatialData <https://spatialdata.scverse.org/>`_ format.

.. Citations
.. ---------
Citations
---------

.. tbd
If you are using `sainsc` for your research please cite

Müller-Bötticher, N., Tiesmeyer, S., Eils, R., and Ishaque, N.
"Sainsc: a computational tool for segmentation-free analysis of in-situ capture"
bioRxiv (2024) https://doi.org/10.1101/2024.08.02.603879

.. toctree::
:maxdepth: 1
Expand Down
14 changes: 6 additions & 8 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,17 @@ Installation
PyPI and ``pip``
----------------

``sainsc`` will soon be available to install from `PyPI <https://pypi.org/>`_.
To install ``sainsc`` from `PyPI <https://pypi.org/>`_ using ``pip`` just run

.. To install ``sainsc`` from `PyPI <https://pypi.org/>`_ using ``pip`` just run
.. .. code-block:: bash
.. code-block:: bash
.. pip install sainsc
pip install sainsc
.. If you want to have support for :py:mod:`spatialdata` use
If you want to have support for :py:mod:`spatialdata` use

.. .. code-block:: bash
.. code-block:: bash
.. pip install 'sainsc[spatialdata]'
pip install sainsc[spatialdata]
Bioconda and ``conda``
Expand Down
6 changes: 3 additions & 3 deletions docs/source/quickstart.ipynb

Large diffs are not rendered by default.

134 changes: 71 additions & 63 deletions docs/source/usage.ipynb

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions sainsc/_utils_rust.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ class GridCounts:
sainsc.GridCounts
"""

def as_dataframe(self) -> DataFrame:
"""
Convert to a dataframe with 'gene', 'x', 'y', and 'count' column.
Returns
-------
polars.DataFrame
"""

def __getitem__(self, key: str) -> _CsxArray: ...
def __setitem__(self, key: str, value: _Csx): ...
def __delitem__(self, key: str): ...
Expand Down
45 changes: 42 additions & 3 deletions src/gridcounts.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::sparsearray_conversion::WrappedCsx;
use crate::utils::create_pool;
use bincode::{deserialize, serialize};
use itertools::MultiUnzip;
use itertools::Itertools;
use ndarray::Array2;
use num::Zero;
use numpy::{IntoPyArray, PyArray2, PyReadonlyArray2};
Expand All @@ -12,12 +12,13 @@ use polars::{
},
prelude::*,
};
use polars_arrow::array::{DictionaryArray, UInt32Array, Utf8Array};
use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
prelude::*,
types::{PyBytes, PyType},
};
use pyo3_polars::PyDataFrame;
use pyo3_polars::{error::PyPolarsErr, PyDataFrame};
use rayon::{
iter::{
IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator,
Expand All @@ -31,6 +32,7 @@ use sprs::{
use std::{
cmp::min,
collections::{HashMap, HashSet},
iter::repeat,
ops::AddAssign,
};

Expand Down Expand Up @@ -203,7 +205,7 @@ impl GridCounts {
let resolution = resolution.map(|r| r * binsize.unwrap_or(1.));

match threadpool.install(|| _from_dataframe(df.into(), binsize)) {
Err(e) => Err(PyValueError::new_err(e.to_string())),
Err(e) => Err(PyPolarsErr::from(e).into()),
Ok((counts, shape)) => Ok(Self {
counts,
shape,
Expand Down Expand Up @@ -426,4 +428,41 @@ impl GridCounts {
});
});
}

fn as_dataframe(&mut self) -> PyResult<PyDataFrame> {
self.to_format(CSR);

let genes: Vec<_> = self.counts.keys().sorted().collect();

let ((counts, (x, y)), gene_idx): ((Vec<&Count>, (Vec<_>, Vec<_>)), Vec<_>) = genes
.iter()
.zip(0u32..)
.flat_map(|(&gene, i)| {
self.get_view(gene)
.expect("gene exists because we collected the keys above")
.iter_rbr()
.zip(repeat(i))
})
.multiunzip();

let counts = Series::from_iter(counts).with_name("count");
let x = Series::from_vec("x", x);
let y = Series::from_vec("y", y);
// construct categorical gene array from codes and categories
let genes = Series::from_arrow(
"gene",
Box::new(
DictionaryArray::try_from_keys(
UInt32Array::from_vec(gene_idx),
Box::new(Utf8Array::<i32>::from_iter(genes.into_iter().map(Some))),
)
.map_err(PyPolarsErr::from)?,
),
)
.map_err(PyPolarsErr::from)?;

let df = DataFrame::new(vec![genes, x, y, counts]).map_err(PyPolarsErr::from)?;

Ok(PyDataFrame(df))
}
}

0 comments on commit a947dc9

Please sign in to comment.