Skip to content

Commit

Permalink
Fix obvious issues
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffquinn-msk committed Feb 8, 2024
1 parent fda1994 commit b1ef787
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 50 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ dependencies = [
"torchvision",
"spatialdata-io",
"matplotlib",
"autograd-minimize"
"autograd-minimize",
"scikit-learn"
]

[project.optional-dependencies]
Expand Down
11 changes: 6 additions & 5 deletions src/nuc2seg/kernel_cell_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
import numpy as np
from scipy.special import softmax
from scipy.stats import poisson
import torch
from torch.distributions import Poisson
from autograd_minimize import minimize
from tqdm import tqdm
from nuc2seg.utils import grid_graph_edges, calc_plateaus
from sklearn.cluster import KMeans


def calculate_neighbor_weights(
Expand Down Expand Up @@ -167,10 +173,6 @@ def e_step(
def fit_fused_lasso_count_process(
counts, lam_min=1e-2, lam_max=50, n_lam=30, segmentation_threshold=0.1
):
import torch
from torch.distributions import Poisson
from autograd_minimize import minimize
from tqdm import tqdm

# Estimates of the latent rates
Rates = np.zeros((n_lam,) + counts.shape)
Expand Down Expand Up @@ -334,7 +336,6 @@ def test_fused_lasso_count_process():
)

# Initialize cell type profiles and cell type IDs via K-means
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=n_cell_types).fit(cell_transcript_counts)
model_cell_types = kmeans.labels_ + 1
Expand Down
10 changes: 6 additions & 4 deletions src/nuc2seg/segment.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import torch
import numpy as np
import geopandas
import pandas

from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components
from scipy.special import expit, softmax

from xenium_utils import pol2cart
from xenium_utils import pol2cart, create_pixel_geodf, load_nuclei


def temp_forward(model, x, y, z):
Expand Down Expand Up @@ -262,7 +264,7 @@ def flow_graph_segmentation(
nuclei_geo_df = load_nuclei(nuclei_file)

# Find the nearest nucleus to each pixel
labels_geo_df = gpd.sjoin_nearest(
labels_geo_df = geopandas.sjoin_nearest(
labels_geo_df, nuclei_geo_df, how="left", distance_col="nucleus_distance"
)
labels_geo_df.rename(columns={"index_right": "nucleus_id_xenium"}, inplace=True)
Expand Down Expand Up @@ -314,7 +316,7 @@ def greedy_cell_segmentation(
nuclei_geo_df = load_nuclei(nuclei_file)

# Find the nearest nucleus to each pixel
labels_geo_df = gpd.sjoin_nearest(
labels_geo_df = geopandas.sjoin_nearest(
labels_geo_df, nuclei_geo_df, how="left", distance_col="nucleus_distance"
)
labels_geo_df.rename(columns={"index_right": "nucleus_id_xenium"}, inplace=True)
Expand Down Expand Up @@ -587,4 +589,4 @@ def save_cell_matrix(pixel_labels_arr, tx_geo_df, outfile):
df[gene_name] = counts[:, idx]

# Save to file
pd.DataFrame(df).to_csv(outfile, index=False)
pandas.DataFrame(df).to_csv(outfile, index=False)
3 changes: 2 additions & 1 deletion src/nuc2seg/train.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import torch
import torch.nn as nn
from nuc2seg.utils.data_loading import XeniumDataset, xenium_collate_fn
from nuc2seg.data_loading import XeniumDataset, xenium_collate_fn
from torch import optim
from torch.utils.data import DataLoader, random_split

from nuc2seg.unet_model import SparseUNet
from nuc2seg.evaluate import evaluate


def angle_loss(predictions, targets):
Expand Down
2 changes: 1 addition & 1 deletion src/nuc2seg/unet_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Full assembly of the parts to form the complete network """

from torch.nn import Embedding

import numpy as np
from nuc2seg.unet_parts import *


Expand Down
3 changes: 1 addition & 2 deletions src/nuc2seg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,7 @@ def calc_plateaus(beta, edges, rel_tol=1e-4, verbose=0):
for local_idx in local_check:
if (
not check_map[local_idx]
and beta[local_idx] >= min_member
and beta[local_idx] <= max_member
and min_member <= beta[local_idx] <= max_member
):
# Label this index as being checked so it's not re-checked unnecessarily
check_map[local_idx] = True
Expand Down
43 changes: 7 additions & 36 deletions src/nuc2seg/xenium_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
import os
import sys

from scipy.special import softmax
import geopandas as gpd
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from shapely import Polygon
from scipy.special import logsumexp
from scipy.stats import poisson

logger = logging.getLogger(__name__)

Expand All @@ -20,36 +23,6 @@ def configure_logging():
)


def get_args():
parser = argparse.ArgumentParser(
description="Align transcripts and xenium cell segmentation"
)
parser.add_argument(
"--transcripts",
required=True,
help="Path to the transcripts.csv file produced by Xenium.",
)
parser.add_argument(
"--boundaries",
required=True,
help="Path to the cell_boundaries.csv file produced by Xenium.",
)
parser.add_argument(
"--sample-area",
default=None,
type=str,
help='Rectangular area to sample in "x1,x2,y1,y2 format.',
)
parser.add_argument("--output", required=True, help="Path to the output file.")
parser.add_argument(
"--boundaries-output", required=True, help="Path to the boundary shapefile."
)
parser.add_argument(
"--transcripts-output", required=True, help="Path to the transcripts shapefile."
)
return parser.parse_args()


def create_shapely_rectangle(x1, y1, x2, y2):
return Polygon([(x1, y1), (x1, y2), (x2, y2), (x2, y1)])

Expand Down Expand Up @@ -135,7 +108,6 @@ def estimate_cell_types(
tol=1e-4,
warm_start=False,
):
from scipy.special import softmax

n_nuclei, n_genes = gene_counts.shape

Expand Down Expand Up @@ -254,7 +226,6 @@ def estimate_cell_types(


def aic_bic(gene_counts, expression_profiles, prior_probs):
from scipy.special import logsumexp

n_components = expression_profiles.shape[0]
n_genes = expression_profiles.shape[1]
Expand Down Expand Up @@ -327,7 +298,7 @@ def calculate_pixel_loglikes(
# Calculate the log-likelihoods for each pixel generating a certain set of transcripts.
# NOTE: yes we could vectorize this, but that leads to memory issues.
expression_loglikes = np.zeros_like(rate_loglikes)
log_expression = np.log(all_expression_profiles)
log_expression = np.log(expression_profiles)
for i in range(expression_loglikes.shape[0]):
if i % 100 == 0:
print(i)
Expand Down Expand Up @@ -390,7 +361,7 @@ def load_and_filter_transcripts(transcripts_file: str, min_qv=20.0):
mapping = dict(zip(gene_ids, np.arange(len(gene_ids))))
tx_geo_df["gene_id"] = tx_geo_df["feature_name"].apply(lambda x: mapping.get(x, 0))

return tx_geo_df
return tx_geo_df, gene_ids


def create_pixel_geodf(x_max, y_max):
Expand All @@ -414,7 +385,7 @@ def create_pixel_geodf(x_max, y_max):
def spatial_as_sparse_arrays(
nuclei_file: str,
transcripts_file: str,
out_dir: str,
outdir: str,
pixel_stride=1,
min_qv=20.0,
foreground_nucleus_distance=1,
Expand All @@ -431,7 +402,7 @@ def spatial_as_sparse_arrays(
nuclei_geo_df = load_nuclei(nuclei_file)

# Load the transcript locations
tx_geo_df = load_and_filter_transcripts(transcripts_file, min_qv=min_qv)
tx_geo_df, gene_ids = load_and_filter_transcripts(transcripts_file, min_qv=min_qv)
n_genes = tx_geo_df["gene_id"].max() + 1

# Get the approx bounds of the image
Expand Down

0 comments on commit b1ef787

Please sign in to comment.