Skip to content

Commit

Permalink
Merge pull request #408 from elfofmaxwell/preprocessing_docstring
Browse files Browse the repository at this point in the history
Preprocessing docstring
  • Loading branch information
Xiaojieqiu authored Sep 3, 2022
2 parents f5ec039 + 6605b7a commit 1d1f5c5
Show file tree
Hide file tree
Showing 44 changed files with 1,900 additions and 1,373 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/python/black
rev: 20.8b1
rev: 22.6.0
hooks:
- id: black
args: [--line-length=120]
Expand Down
3 changes: 2 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ GitPython
KDEpy

sphinxcontrib-bibtex>=2.3
sphinx-gallery
sphinx-gallery
typing-extensions
4 changes: 2 additions & 2 deletions dynamo/estimation/csc/utils_velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def solve_alpha_degradation(t, u, beta, intercept=False):
ym = np.mean(y)

# calculate slope
var_x = np.mean(x ** 2) - xm ** 2
var_x = np.mean(x**2) - xm**2
cov = np.sum(y.dot(x)) / n - ym * xm
k = cov / var_x

Expand Down Expand Up @@ -776,7 +776,7 @@ def concat_time_series_matrices(mats, t=None):
# ---------------------------------------------------------------------------------------------------
# negbin method related
def compute_dispersion(mX, varX):
phi = fit_linreg(mX ** 2, varX - mX, intercept=False)[0]
phi = fit_linreg(mX**2, varX - mX, intercept=False)[0]
return phi


Expand Down
16 changes: 8 additions & 8 deletions dynamo/estimation/fit_jacobian.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@


def hill_inh_func(x, A, K, n, g):
Kd = K ** n
return A * Kd / (Kd + x ** n) - g * x
Kd = K**n
return A * Kd / (Kd + x**n) - g * x


def hill_inh_grad(x, A, K, n, g):
Kd = K ** n
return -A * n * Kd * x ** (n - 1) / (Kd + x ** n) ** 2 - g
Kd = K**n
return -A * n * Kd * x ** (n - 1) / (Kd + x**n) ** 2 - g


def hill_act_func(x, A, K, n, g):
Kd = K ** n
return A * x ** n / (Kd + x ** n) - g * x
Kd = K**n
return A * x**n / (Kd + x**n) - g * x


def hill_act_grad(x, A, K, n, g):
Kd = K ** n
return A * n * Kd * x ** (n - 1) / (Kd + x ** n) ** 2 - g
Kd = K**n
return A * n * Kd * x ** (n - 1) / (Kd + x**n) ** 2 - g


def calc_mean_squared_deviation(func, x_data, y_mean, y_sigm, weighted=True):
Expand Down
12 changes: 6 additions & 6 deletions dynamo/estimation/tsc/utils_kinetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,11 @@ def get_n_labeled(self):

def get_var_nu(self):
c = self.get_nu()
return self.x[:, self.uu] + c - c ** 2
return self.x[:, self.uu] + c - c**2

def get_var_nx(self):
c = self.get_nx()
return self.x[:, self.xx] + c - c ** 2
return self.x[:, self.xx] + c - c**2

def get_cov_ux(self):
cu = self.get_nu()
Expand Down Expand Up @@ -380,7 +380,7 @@ def get_nu(self):

def get_var_nu(self):
c = self.get_nu()
return self.x[:, self.uu] + c - c ** 2
return self.x[:, self.uu] + c - c**2

def computeKnp(self):
# parameters
Expand Down Expand Up @@ -480,11 +480,11 @@ def get_mean_s(self):

def get_var_u(self):
c = self.get_mean_u()
return self.x[:, self.uu] - c ** 2
return self.x[:, self.uu] - c**2

def get_var_s(self):
c = self.get_mean_s()
return self.x[:, self.ss] - c ** 2
return self.x[:, self.ss] - c**2

def get_cov_us(self):
cu = self.get_mean_u()
Expand Down Expand Up @@ -576,7 +576,7 @@ def get_mean_u(self):

def get_var_u(self):
c = self.get_mean_u()
return self.x[:, self.uu] - c ** 2
return self.x[:, self.uu] - c**2

def computeKnp(self):
# parameters
Expand Down
4 changes: 2 additions & 2 deletions dynamo/estimation/tsc/utils_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,11 @@ def get_n_labeled(self):

def get_var_nu(self):
c = self.get_nu()
return self.x[:, self.uu] + c - c ** 2
return self.x[:, self.uu] + c - c**2

def get_var_nx(self):
c = self.get_nx()
return self.x[:, self.xx] + c - c ** 2
return self.x[:, self.xx] + c - c**2

def get_cov_ux(self):
cu = self.get_nu()
Expand Down
4 changes: 2 additions & 2 deletions dynamo/external/pearson_residual_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def _highly_variable_pearson_residuals(
stop = start + chunksize
mu = np.array(sums_cells @ sums_genes[:, start:stop] / sum_total)
X_dense = X_batch[:, start:stop].toarray()
residuals = (X_dense - mu) / np.sqrt(mu + mu ** 2 / theta)
residuals = (X_dense - mu) / np.sqrt(mu + mu**2 / theta)
residuals = np.clip(residuals, a_min=-clip, a_max=clip)
residual_gene_var[start:stop] = np.var(residuals, axis=0)

Expand Down Expand Up @@ -377,7 +377,7 @@ def compute_pearson_residuals(X, theta, clip, check_values, copy=False):

mu = np.array(sums_cells @ sums_genes / sum_total)
diff = np.array(X - mu)
residuals = diff / np.sqrt(mu + mu ** 2 / theta)
residuals = diff / np.sqrt(mu + mu**2 / theta)

# clip
residuals = np.clip(residuals, a_min=-clip, a_max=clip)
Expand Down
8 changes: 4 additions & 4 deletions dynamo/external/sctransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def sctransform_core(
x = model_pars["theta"].values.copy()
x[x < min_theta] = min_theta
model_pars["theta"] = x
dispersion_par = np.log10(1 + 10 ** genes_log_gmean_step1 / model_pars["theta"].values.flatten())
dispersion_par = np.log10(1 + 10**genes_log_gmean_step1 / model_pars["theta"].values.flatten())

model_pars_theta = model_pars["theta"]
model_pars = model_pars.iloc[:, model_pars.columns != "theta"].copy()
Expand Down Expand Up @@ -250,7 +250,7 @@ def sctransform_core(
)
full_model_pars[i] = kr.fit(data_predict=x_points)[0]

theta = 10 ** genes_log_gmean / (10 ** full_model_pars["dispersion"].values - 1)
theta = 10**genes_log_gmean / (10 ** full_model_pars["dispersion"].values - 1)
full_model_pars["theta"] = theta
del full_model_pars["dispersion"]

Expand All @@ -261,9 +261,9 @@ def sctransform_core(
d = X.data
x, y = X.nonzero()
mud = np.exp(full_model_pars.values[:, 0][y] + full_model_pars.values[:, 1][y] * cell_attrs["log_umi"].values[x])
vard = mud + mud ** 2 / full_model_pars["theta"].values.flatten()[y]
vard = mud + mud**2 / full_model_pars["theta"].values.flatten()[y]

X.data[:] = (d - mud) / vard ** 0.5
X.data[:] = (d - mud) / vard**0.5
X.data[X.data < 0] = 0
X.eliminate_zeros()

Expand Down
2 changes: 1 addition & 1 deletion dynamo/plot/ezplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def zstreamline(
"zorder": 3,
}

mass = np.sqrt((V_grid ** 2).sum(0))
mass = np.sqrt((V_grid**2).sum(0))
# velocity filtering
if min_vel_mag is not None:
min_vel_mag = np.clip(min_vel_mag, None, np.quantile(mass, 0.4))
Expand Down
6 changes: 3 additions & 3 deletions dynamo/plot/heatmaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def rep2(x, length_out):


def dnorm(x, u=0, sig=1):
return np.exp(-((x - u) ** 2) / (2 * sig ** 2)) / (math.sqrt(2 * math.pi) * sig)
return np.exp(-((x - u) ** 2) / (2 * sig**2)) / (math.sqrt(2 * math.pi) * sig)


def kde2d(x, y, h=None, n=25, lims=None):
Expand Down Expand Up @@ -286,7 +286,7 @@ def response(

id = 0
for gene_pairs_ind, gene_pairs in enumerate(pairs_mat):
f_ini_ind = (grid_num ** 2) * id
f_ini_ind = (grid_num**2) * id
r_ini_ind = grid_num * id

gene_pair_name = gene_pairs[0] + "->" + gene_pairs[1]
Expand Down Expand Up @@ -842,7 +842,7 @@ def causality(
id = 0
for gene_pairs_ind in range(0, len(pairs_mat)):
gene_pairs = pairs_mat[gene_pairs_ind, :]
f_ini_ind = (grid_num ** 2) * id
f_ini_ind = (grid_num**2) * id

gene_pair_name = reduce(lambda a, b: a + "->" + b, gene_pairs)

Expand Down
6 changes: 3 additions & 3 deletions dynamo/plot/scVectorField.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def line_integral_conv(
V_grid = V_grid_[1, :, :].T

if V_threshold is not None:
mass = np.sqrt((V_grid ** 2).sum(0))
mass = np.sqrt((V_grid**2).sum(0))
if V_threshold is not None:
V_grid[0][mass.reshape(V_grid[0].shape) < V_threshold] = np.nan

Expand Down Expand Up @@ -498,7 +498,7 @@ def line_integral_conv(
data["velocity_y"] = (velocity_y, "km/s")
data["velocity_z"] = (velocity_z, "km/s")
data["velocity_sum"] = (
np.sqrt(velocity_x ** 2 + velocity_y ** 2),
np.sqrt(velocity_x**2 + velocity_y**2),
"km/s",
)

Expand Down Expand Up @@ -1623,7 +1623,7 @@ def streamline_plot(
"integration_direction": "both",
"zorder": 3,
}
mass = np.sqrt((V_grid ** 2).sum(0))
mass = np.sqrt((V_grid**2).sum(0))
linewidth *= 2 * mass / mass[~np.isnan(mass)].max()
streamplot_kwargs.update({"linewidth": linewidth * streamline_kwargs.pop("linewidth", 1)})

Expand Down
2 changes: 1 addition & 1 deletion dynamo/plot/scatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ def _plot_basis_layer(cur_b, cur_l):
values = (
calc_1nd_moment(values, knn)[0]
if smooth in [1, True]
else calc_1nd_moment(values, knn ** smooth)[0]
else calc_1nd_moment(values, knn**smooth)[0]
)

if affine_transform_A is None or affine_transform_b is None:
Expand Down
2 changes: 1 addition & 1 deletion dynamo/plot/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def space(
# meaning of s in scatters:
# https://stackoverflow.com/questions/14827650/pyplot-scatter-plot-marker-size/47403507#47403507
# Note that np.sqrt(adata.shape[0]) / 16000.0 is used in pl.scatters
pointsize = pointsize ** 2 * np.sqrt(adata.shape[0]) / 16000.0
pointsize = pointsize**2 * np.sqrt(adata.shape[0]) / 16000.0

main_info("estimated point size for plotting each cell in space: %f" % (pointsize))

Expand Down
2 changes: 1 addition & 1 deletion dynamo/plot/topography.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def plot_flow_field(
u_vel[i, j], v_vel[i, j] = vecfld(np.array([uu[i, j], vv[i, j]]))

# Compute speed
speed = np.sqrt(u_vel ** 2 + v_vel ** 2)
speed = np.sqrt(u_vel**2 + v_vel**2)

# Make linewidths proportional to speed,
# with minimal line width of 0.5 and max of 3
Expand Down
2 changes: 1 addition & 1 deletion dynamo/prediction/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def interp_X(self, num=100, **interp_kwargs):
return self.interpolate(self.interp_t(num=num), **interp_kwargs)

def integrate(self, func):
""" Calculate the integral of func along the curve. The first and last points are omitted. """
"""Calculate the integral of func along the curve. The first and last points are omitted."""
F = np.zeros(func(self.X[0]).shape)
tvec = self.calc_tangent(normalize=False)
for i in range(1, self.X.shape[0] - 1):
Expand Down
23 changes: 19 additions & 4 deletions dynamo/preprocessing/CnmfPreprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@


class CnmfPreprocessor(Preprocessor):
def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
"""A specialized preprocessor based on cNMF. Args used are the same as normal Preprocessor."""

super().__init__(**kwargs)
self.selected_K = 7
self.n_iter = 200
Expand All @@ -26,7 +28,16 @@ def __init__(self, **kwargs):
# TODO: enable parallel computing in the future. Currently cNMF only provides cmd interfaces for factorization.
self.num_worker = 1

def preprocess_adata(self, adata: AnnData):
def preprocess_adata(self, adata: AnnData) -> AnnData:
"""Preprocess the AnnData object with cNMF.
Args:
adata: an AnnData object.
Returns:
The preprocessed AnnData object.
"""

try:
from cnmf import cNMF
except Exception as e:
Expand Down Expand Up @@ -67,8 +78,12 @@ def preprocess_adata(self, adata: AnnData):
self.cnmf_obj = cnmf_obj
return adata

def k_selection_plot(self):
def k_selection_plot(self) -> None:
"""Plot the K selection curve of cNMF and save to the output folder."""

self.cnmf_obj.k_selection_plot(close_fig=False)

def cleanup_cnmf(self):
def cleanup_cnmf(self) -> None:
"""Remove the tmp folder to store data used for cNMF."""

rmtree(self.output_dir, ignore_errors=True)
Loading

0 comments on commit 1d1f5c5

Please sign in to comment.