Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drg tools/modules cleanup #5

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ data
**.backup
trash
archive
.venv
.*_cache
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,18 @@ Deep sequence-to-function models learn the relationship between genomic sequence

## Installation

Download the repository and setup conda environment.
Download the repository and setup virtual or conda environment.

To create and activate a virtual environment, run:
```
python3 -m venv .venv
source .venv/bin/activate

pip install --upgrade pip
```

To install dev requirements (e.g. linters, typehinting), run:
`pip install -r dev_requirements.txt`

Install by navigating to the location of the local repository

Expand Down
4 changes: 4 additions & 0 deletions dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pytest
ruff
mypy
types-setuptools
49 changes: 30 additions & 19 deletions drg_tools/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@
i.e. loss functions and model layers
'''


import sys, os
import math
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F
from collections import OrderedDict
from einops.layers.torch import Rearrange
from fft_conv_pytorch import fft_conv
from torch import Tensor
from torch.nn.parameter import Parameter
import math
import torch.nn.functional as F
from fft_conv_pytorch import fft_conv


class EXPmax(nn.Module):
Expand Down Expand Up @@ -367,7 +366,7 @@ def __init__(self, reduction = 'none', log_counts = True, eps = 1, mse_ratio = 1
self.log_counts = log_counts
self.eps = eps

def forward(self, p: torch.tensor, q: torch.tensor):
def forward(self, p: torch.Tensor, q: torch.Tensor):
# bin the counts data, if mean_size = None then bin is entire length of input
if self.mean_size is None:
self.mean_size = p.size(dim = -1)
Expand Down Expand Up @@ -403,7 +402,7 @@ def __init__(self, sum_axis = -1, norm_last = True, reduction = 'none', eps = 1e
self.reduction = reduction
self.eps = eps

def forward(self, p: torch.tensor, q: torch.tensor):
def forward(self, p: torch.Tensor, q: torch.Tensor):
if self.mse is not None:
if self.mean_size is None:
self.mean_size = p.size(dim = -1)
Expand Down Expand Up @@ -454,7 +453,7 @@ def __init__(self, reduction = 'none', log_counts = True, eps = 1, mse_ratio = 1
self.log_counts = log_counts
self.eps = eps

def forward(self, p: torch.tensor, q: torch.tensor):
def forward(self, p: torch.Tensor, q: torch.Tensor):
if self.mean_size is None:
self.mean_size = p.size(dim = -1)
if self.meanpool is None:
Expand Down Expand Up @@ -487,8 +486,8 @@ def forward(self, p, q):
q = q-minq.unsqueeze(-1)
q =torch.log(q+self.eps)
if self.log_prediction:
p = p-minp.unsqueeze(-1)
minp = torch.min(p,dim =-1)[0]
p = p-minp.unsqueeze(-1)
p =torch.log(p+self.eps)
return self.mse(p,q)

Expand Down Expand Up @@ -796,7 +795,6 @@ def forward(self, x):

return pred

from einops.layers.torch import Rearrange
#from einops import rearrange
# This one is included into Padded_AvgPool1d now
class SoftmaxNorm(nn.Module):
Expand Down Expand Up @@ -1308,7 +1306,7 @@ def __init__(self, indim, inlen, n_kernels, l_kernels, n_layers, kernel_increase
self.convlayers['Bnorm'+str(n)] = nn.BatchNorm1d(currdim + int(concatenate_residual*dtl*(residual_after>0))*currdim)

# decide if activation function should be applied before or after convolutional layer
if act_func_before and ((~is_modified) or (n != 0)):
if act_func_before and ((not is_modified) or (n != 0)):
self.convlayers['Conv_func'+str(n)] = func_dict[activation_function]()

if long_conv:
Expand All @@ -1324,7 +1322,7 @@ def __init__(self, indim, inlen, n_kernels, l_kernels, n_layers, kernel_increase
# (non-symmetric) padding to have same padding left and right of sequence and get same sequence length
convpad = [int(np.floor((dilations[n]*(l_kernels[n]-1)+1)/2))-int((dilations[n]*(l_kernels[n]-1)+1)%2==0), int(np.floor((dilations[n]*(l_kernels[n]-1)+1)/2))]
# padded convolutional layer
concatcheck = int(concatenate_residual*dtl)*int(n%residual_after==0)*int(linear_layer==False) # check if input is concatenated output of convolution and residual or not
concatcheck = int(concatenate_residual*dtl)*int(n%residual_after==0)*int(linear_layer is False) # check if input is concatenated output of convolution and residual or not
self.convlayers['Conv'+str(n)] = Padded_Conv1d(currdim+ concatcheck*currdim, int(currdim*kernel_increase[n]), kernel_size = l_kernels[n], bias = bias, stride = strides[n], dilation = dilations[n], padding = convpad)
currlen = int(np.floor((currlen +convpad[0]+convpad[1]- dilations[n]*(l_kernels[n]-1)-1)/strides[n]+1))
# see above
Expand Down Expand Up @@ -1389,7 +1387,9 @@ def __init__(self, indim, inlen, n_kernels, l_kernels, n_layers, kernel_increase
self.residual_entire = Residual_convolution(resedim, currdim, resentire)
else:
self.residual_entire = None
concatcheck = int(concatenate_residual)*int(n%residual_after==0)*int(linear_layer==False) # check if input is concatenated output of convolution and residual or not

# check if input is concatenated output of convolution and residual or not
concatcheck = int(concatenate_residual)*int(n%residual_after==0)*int(linear_layer is False)

self.currdim, self.currlen = currdim+ concatcheck*currdim +int(residual_entire)*currdim, currlen

Expand Down Expand Up @@ -1449,7 +1449,7 @@ def __init__(self, in_channels, in_len, out_channels, kernel_size, kernel_gap, s
self.out_len = int(np.floor((in_len + padding - kernel_size)/stride +1))
# max pooling before layers are flattened to reduce dimension of output given to fully connected layer
self.pooling = None
if pooling == True:
if pooling is True:
poolstride = int(kernel_size/2)
elif pooling > 1:
poolstride = pooling
Expand Down Expand Up @@ -1537,7 +1537,6 @@ def __init__(self, modellist, flatten = True):
def forward(self, x):
out = []
for m in self.modellist:
outadd = m(x)
if self.flatten:
out.append(torch.flatten(m(x), start_dim = 1, end_dim = -1))
else:
Expand All @@ -1559,8 +1558,11 @@ def __init__(self, indim, out_classes, l_kernels, cut_sites = None, strides = 1,
self.cut_sites = [cut_sites, cut_sites]
else:
self.cut_sites = cut_sites

if batch_norm:
self.Bnorm = self.nn.BatchNorm1d(currdim)
# TODO: VERIFY
# NOTE [Alyss Flynn 2024-10-21]: used gap_conv init as reference; changed self.nn.BatchNorm1d(currdim) to nn.BatchNorm1d(indim)
self.Bnorm = nn.BatchNorm1d(indim)

self.n_convolutions = n_convolutions
if n_convolutions > 1:
Expand Down Expand Up @@ -1603,7 +1605,7 @@ def forward(self, x):

# Interaction module creates non-linear interactions between all features by multiplying them with each other and then multiplies a weight matrix to them
class interaction_module(nn.Module):
def __init__(self, indim, outdim):
def __init__(self, indim, outdim, classes=None):
super(interaction_module, self).__init__()
self.outdim = outdim # if outdim is 1 then use softmax output
# else use RelU
Expand Down Expand Up @@ -1844,7 +1846,7 @@ def forward(self,x):
if self.receptive_matmul.mask.is_cuda:
devicetobe = self.qpred.get_device()
self.receptive_matmul.to('cuda:'+str(devicetobe))
attmatix = self.receptive_matmul(qpred, kpred)
attmatrix = self.receptive_matmul(qpred, kpred)
else:
qpred = qpred.transpose(-1,-2)
attmatrix = torch.matmul(qpred, kpred)
Expand Down Expand Up @@ -1932,9 +1934,18 @@ def forward(self, x):
# Returns a stretching and adds bias for each kernel dimension after convolution
# Also good example how write own module with any tensor multiplication and initialized parameters
class Kernel_linear(nn.Module):
def __init__(self, n_kernels: int) -> None:
# TODO: VERIFY
# NOTE [Alyss Flynn 2024-10-21]: this init only included one input `n_kernels: int`,
# but common usage appears to require two inputs => `Kernel_linear(currdim, self.kernel_thresholding)`,
# and internally it seems to require additional keyword parameters (**factory_kwargs) to pass into torch.empty.
#
# I added `kernel_thresholding: int` and `**factory_kwargs` to the init, to fix Type/NameErrors,
# but I want to verify these changes are appropriate for typical use of this class.
# Also, we should replace `**factory_kwargs` with explicit key-value pairs to avoid passing invalid args to torch.empty.
def __init__(self, n_kernels: int, kernel_thresholding: int, **factory_kwargs) -> None:
super(Kernel_linear, self).__init__()
self.n_kernels = n_kernels
self.kernel_thresholding = kernel_thresholding
self.weight = Parameter(torch.empty((1, n_kernels, 1), **factory_kwargs))
self.bias = Parameter(torch.empty(n_kernels, **factory_kwargs))
self.init_parameters()
Expand Down
11 changes: 11 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[mypy-fft_conv_pytorch.*]
ignore_missing_imports = True

[mypy-scipy.*]
ignore_missing_imports = True

[mypy-sklearn.*]
ignore_missing_imports = True

[mypy-joblib.*]
ignore_missing_imports = True
13 changes: 11 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,16 @@
license='LICENSE',
description='drg_tools contains classes and functions to create and analyze sequence-to-function models.',
install_requires=[
"numpy >= 1.14.2",
"torch >= 1.9.0",
"einops == 0.8.0",
"fft_conv_pytorch == 1.2.0",
"joblib",
"logomaker>=0.8",
"matplotlib>=3.8",
"numpy >= 1.26",
"pandas>=2.2.2",
"scikit-learn>=1.4.2",
"scipy>=1.13",
"seaborn>=0.13.2",
"torch >= 2.3.1",
],
)