Skip to content

Commit

Permalink
Merge pull request #41 from flatironinstitute/dev
Browse files Browse the repository at this point in the history
v0.5.5
  • Loading branch information
asistradition authored Apr 29, 2021
2 parents 2c8350b + 4be3aab commit 0ec4df9
Show file tree
Hide file tree
Showing 12 changed files with 293 additions and 65 deletions.
11 changes: 11 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
Change Log
==========

Inferelator v0.5.5 `April 29, 2021`
-----------------------------------

New Functionality:

- Added ``.set_regression_parameters(tol=None)`` to parameterize tolerances in AMuSR regression

Code Refactoring:

- Profiled and optimized AMuSR code

Inferelator v0.5.4 `April 23, 2021`
-----------------------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = 'Chris Jackson'

# The full version, including alpha/beta/rc tags
release = 'v0.5.4'
release = 'v0.5.5'


# -- General configuration ---------------------------------------------------
Expand Down
21 changes: 13 additions & 8 deletions inferelator/crossvalidation_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class CrossValidationManager(object):
size_sample_stratified_column = None
size_sample_with_replacement = False
size_sample_seed = None
size_sample_only = False

# Workflow storage
_baseline_workflow = None
Expand Down Expand Up @@ -180,7 +181,8 @@ def add_grouping_dropin(self, metadata_column_name, group_size=None, seed=42):
self.dropin_max_size = group_size
self.dropin_seed = seed

def add_size_subsampling(self, size_vector, stratified_column_name=None, with_replacement=False, seed=42):
def add_size_subsampling(self, size_vector, stratified_column_name=None, with_replacement=False, seed=42,
size_sample_only=None):
"""
Resample expression data to a ratio of the original data.
Expand All @@ -206,6 +208,7 @@ def add_size_subsampling(self, size_vector, stratified_column_name=None, with_re
self.size_sample_stratified_column = stratified_column_name
self.size_sample_with_replacement = with_replacement
self.size_sample_seed = seed
self.size_sample_only = size_sample_only if size_sample_only is not None else self.size_sample_only

def run(self):
"""
Expand All @@ -222,7 +225,10 @@ def run(self):
self._check_grid_search_params_exist()

# Run base grid search
results = self._grid_search()
if self.size_sample_only:
results = []
else:
results = self._grid_search()

# Run size sampling
if self.size_sample_vector is not None:
Expand Down Expand Up @@ -382,8 +388,7 @@ def _grid_search(self, test=None, value=None, mask_function=None):
# Drop any observations which are False in the mask (if set)
if mask_function is not None:
mask = mask_function()
cv_workflow.expression_matrix.drop(cv_workflow.expression_matrix.columns[~mask], axis=1, inplace=True)
cv_workflow.meta_data.drop(cv_workflow.meta_data.index[~mask], axis=0, inplace=True)
cv_workflow.data = cv_workflow.data.get_sample_data(mask.index[mask])
n_obs = mask.sum()
else:
n_obs = cv_workflow._num_obs
Expand Down Expand Up @@ -448,7 +453,7 @@ def _check_metadata_column_exists(self, col_name):
:param col_name: str
"""

if col_name in self.workflow.meta_data.columns:
if col_name in self.workflow.data.meta_data.columns:
return True
else:
raise ValueError("Column {col} is not present in the loaded metadata".format(col=col_name))
Expand All @@ -458,7 +463,7 @@ def _dropout_cv(self):
Run grid search on all data minus one group at a time
"""

meta_data = self.workflow.meta_data.copy()
meta_data = self.workflow.data.meta_data.copy()
col = self.dropout_column
max_size = self.dropout_max_size

Expand Down Expand Up @@ -511,7 +516,7 @@ def _dropin_cv(self):
Run grid search on one group from the data at a time
"""

meta_data = self.workflow.meta_data.copy()
meta_data = self.workflow.data.meta_data.copy()
col = self.dropin_column
max_size = self.dropin_max_size

Expand Down Expand Up @@ -551,7 +556,7 @@ def _size_cv(self):

for i, size_ratio in enumerate(self.size_sample_vector):
rgen = np.random.RandomState(self.size_sample_seed + i)
meta_data = self.workflow.meta_data.copy()
meta_data = self.workflow.data.meta_data.copy()

if self.size_sample_stratified_column is not None:
strat_col = self.size_sample_stratified_column
Expand Down
5 changes: 3 additions & 2 deletions inferelator/distributed/dask_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@


def amusr_regress_dask(X, Y, priors, prior_weight, n_tasks, genes, tfs, G, remove_autoregulation=True,
lambda_Bs=None, lambda_Ss=None, Cs=None, Ss=None, regression_function=None):
lambda_Bs=None, lambda_Ss=None, Cs=None, Ss=None, regression_function=None,
tol=None, rel_tol=None):
"""
Execute multitask (AMUSR)
Expand Down Expand Up @@ -54,7 +55,7 @@ def regression_maker(j, x_df, y_list, prior, tf):

prior = format_prior(prior, gene, tasks, prior_weight, tfs=tf)
return j, regression_function(x, y, tf, tasks, gene, prior,
lambda_Bs=lambda_Bs, lambda_Ss=lambda_Ss, Cs=Cs, Ss=Ss)
lambda_Bs=lambda_Bs, lambda_Ss=lambda_Ss, Cs=Cs, Ss=Ss, tol=tol, rel_tol=rel_tol)

def response_maker(y_df, i):
y = []
Expand Down
Loading

0 comments on commit 0ec4df9

Please sign in to comment.