LXsasse · alyssflynn · Oct 21, 2024 · Oct 21, 2024 · Oct 21, 2024 · Oct 21, 2024
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,5 @@ data
 **.backup
 trash
 archive
+.venv
+.*_cache
diff --git a/README.md b/README.md
@@ -31,7 +31,18 @@ Deep sequence-to-function models learn the relationship between genomic sequence
 
 ## Installation
 
-Download the repository and setup conda environment.
+Download the repository and setup virtual or conda environment.
+
+To create and activate a virtual environment, run:
+```
+python3 -m venv .venv
+source .venv/bin/activate
+
+pip install --upgrade pip
+```
+
+To install dev requirements (e.g. linters, typehinting), run:
+`pip install -r dev_requirements.txt`
 
 Install by navigating to the location of the local repository
 

diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -0,0 +1,4 @@
+pytest
+ruff
+mypy
+types-setuptools
diff --git a/drg_tools/modules.py b/drg_tools/modules.py
@@ -4,17 +4,16 @@
 i.e. loss functions and model layers
 '''
 
-
-import sys, os 
+import math
 import numpy as np
 import torch.nn as nn
 import torch
+import torch.nn.functional as F
 from collections import OrderedDict
+from einops.layers.torch import Rearrange
+from fft_conv_pytorch import fft_conv
 from torch import Tensor
 from torch.nn.parameter import Parameter
-import math
-import torch.nn.functional as F
-from fft_conv_pytorch import fft_conv
 
 
 class EXPmax(nn.Module):
@@ -367,7 +366,7 @@ def __init__(self, reduction = 'none', log_counts = True, eps = 1, mse_ratio = 1
         self.log_counts = log_counts
         self.eps = eps
 
-    def forward(self, p: torch.tensor, q: torch.tensor):
+    def forward(self, p: torch.Tensor, q: torch.Tensor):
         # bin the counts data, if mean_size = None then bin is entire length of input
         if self.mean_size is None:
             self.mean_size = p.size(dim = -1)
@@ -403,7 +402,7 @@ def __init__(self, sum_axis = -1, norm_last = True, reduction = 'none', eps = 1e
         self.reduction = reduction
         self.eps = eps
 
-    def forward(self, p: torch.tensor, q: torch.tensor):
+    def forward(self, p: torch.Tensor, q: torch.Tensor):
         if self.mse is not None:
             if self.mean_size is None:
                 self.mean_size = p.size(dim = -1)
@@ -454,7 +453,7 @@ def __init__(self, reduction = 'none', log_counts = True, eps = 1, mse_ratio = 1
         self.log_counts = log_counts
         self.eps = eps
 
-    def forward(self, p: torch.tensor, q: torch.tensor):
+    def forward(self, p: torch.Tensor, q: torch.Tensor):
         if self.mean_size is None:
             self.mean_size = p.size(dim = -1)
         if self.meanpool is None:
@@ -487,8 +486,8 @@ def forward(self, p, q):
         q = q-minq.unsqueeze(-1)
         q =torch.log(q+self.eps)
         if self.log_prediction:
-            p = p-minp.unsqueeze(-1)
             minp = torch.min(p,dim =-1)[0]
+            p = p-minp.unsqueeze(-1)
             p =torch.log(p+self.eps)
         return self.mse(p,q)
 
@@ -796,7 +795,6 @@ def forward(self, x):
 
         return pred
 
-from einops.layers.torch import Rearrange
 #from einops import rearrange
 # This one is included into Padded_AvgPool1d now
 class SoftmaxNorm(nn.Module):
@@ -1308,7 +1306,7 @@ def __init__(self, indim, inlen, n_kernels, l_kernels, n_layers, kernel_increase
                 self.convlayers['Bnorm'+str(n)] = nn.BatchNorm1d(currdim + int(concatenate_residual*dtl*(residual_after>0))*currdim)
 
             # decide if activation function should be applied before or after convolutional layer
-            if act_func_before and ((~is_modified) or (n != 0)):
+            if act_func_before and ((not is_modified) or (n != 0)):
                 self.convlayers['Conv_func'+str(n)] = func_dict[activation_function]()
 
             if long_conv:
@@ -1324,7 +1322,7 @@ def __init__(self, indim, inlen, n_kernels, l_kernels, n_layers, kernel_increase
                 # (non-symmetric) padding to have same padding left and right of sequence and get same sequence length
                 convpad = [int(np.floor((dilations[n]*(l_kernels[n]-1)+1)/2))-int((dilations[n]*(l_kernels[n]-1)+1)%2==0), int(np.floor((dilations[n]*(l_kernels[n]-1)+1)/2))]
                 # padded convolutional layer
-                concatcheck = int(concatenate_residual*dtl)*int(n%residual_after==0)*int(linear_layer==False) # check if input is concatenated output of convolution and residual or not
+                concatcheck = int(concatenate_residual*dtl)*int(n%residual_after==0)*int(linear_layer is False) # check if input is concatenated output of convolution and residual or not
                 self.convlayers['Conv'+str(n)] = Padded_Conv1d(currdim+ concatcheck*currdim, int(currdim*kernel_increase[n]), kernel_size = l_kernels[n], bias = bias, stride = strides[n], dilation = dilations[n], padding = convpad)
                 currlen = int(np.floor((currlen +convpad[0]+convpad[1]- dilations[n]*(l_kernels[n]-1)-1)/strides[n]+1))
             # see above
@@ -1389,7 +1387,9 @@ def __init__(self, indim, inlen, n_kernels, l_kernels, n_layers, kernel_increase
             self.residual_entire = Residual_convolution(resedim, currdim, resentire)
         else:
             self.residual_entire = None
-        concatcheck = int(concatenate_residual)*int(n%residual_after==0)*int(linear_layer==False) # check if input is concatenated output of convolution and residual or not
+
+        # check if input is concatenated output of convolution and residual or not
+        concatcheck = int(concatenate_residual)*int(n%residual_after==0)*int(linear_layer is False) 
 
         self.currdim, self.currlen = currdim+ concatcheck*currdim +int(residual_entire)*currdim, currlen
 
@@ -1449,7 +1449,7 @@ def __init__(self, in_channels, in_len, out_channels, kernel_size, kernel_gap, s
         self.out_len = int(np.floor((in_len + padding - kernel_size)/stride +1))
         # max pooling before layers are flattened to reduce dimension of output given to fully connected layer
         self.pooling = None
-        if pooling == True:
+        if pooling is True:
             poolstride = int(kernel_size/2)
         elif pooling > 1:
             poolstride = pooling
@@ -1537,7 +1537,6 @@ def __init__(self, modellist, flatten = True):
     def forward(self, x):
         out = []
         for m in self.modellist:
-            outadd = m(x)
             if self.flatten:
                 out.append(torch.flatten(m(x), start_dim = 1, end_dim = -1))
             else:
@@ -1559,8 +1558,11 @@ def __init__(self, indim, out_classes, l_kernels, cut_sites = None, strides = 1,
             self.cut_sites = [cut_sites, cut_sites]
         else:
             self.cut_sites = cut_sites
+
         if batch_norm:
-            self.Bnorm = self.nn.BatchNorm1d(currdim)
+            # TODO: VERIFY 
+            # NOTE [Alyss Flynn 2024-10-21]: used gap_conv init as reference; changed self.nn.BatchNorm1d(currdim) to nn.BatchNorm1d(indim) 
+            self.Bnorm = nn.BatchNorm1d(indim)
 
         self.n_convolutions = n_convolutions
         if n_convolutions > 1:
@@ -1603,7 +1605,7 @@ def forward(self, x):
 
 # Interaction module creates non-linear interactions between all features by multiplying them with each other and then multiplies a weight matrix to them
 class interaction_module(nn.Module):
-    def __init__(self, indim, outdim):
+    def __init__(self, indim, outdim, classes=None):
         super(interaction_module, self).__init__()
         self.outdim = outdim # if outdim is 1 then use softmax output
         # else use RelU
@@ -1844,7 +1846,7 @@ def forward(self,x):
                 if self.receptive_matmul.mask.is_cuda:
                     devicetobe = self.qpred.get_device()
                     self.receptive_matmul.to('cuda:'+str(devicetobe))
-            attmatix = self.receptive_matmul(qpred, kpred)
+            attmatrix = self.receptive_matmul(qpred, kpred)
         else:
             qpred = qpred.transpose(-1,-2)
             attmatrix = torch.matmul(qpred, kpred)
@@ -1932,9 +1934,18 @@ def forward(self, x):
 # Returns a stretching and adds bias for each kernel dimension after convolution
 # Also good example how write own module with any tensor multiplication and initialized parameters
 class Kernel_linear(nn.Module):
-    def __init__(self, n_kernels: int) -> None:
+    # TODO: VERIFY 
+    # NOTE [Alyss Flynn 2024-10-21]: this init only included one input `n_kernels: int`,
+    # but common usage appears to require two inputs => `Kernel_linear(currdim, self.kernel_thresholding)`,
+    # and internally it seems to require additional keyword parameters (**factory_kwargs) to pass into torch.empty.
+    # 
+    # I added `kernel_thresholding: int` and `**factory_kwargs` to the init, to fix Type/NameErrors, 
+    # but I want to verify these changes are appropriate for typical use of this class.
+    # Also, we should replace `**factory_kwargs` with explicit key-value pairs to avoid passing invalid args to torch.empty.
+    def __init__(self, n_kernels: int, kernel_thresholding: int, **factory_kwargs) -> None:
         super(Kernel_linear, self).__init__()
         self.n_kernels = n_kernels
+        self.kernel_thresholding = kernel_thresholding
         self.weight = Parameter(torch.empty((1, n_kernels, 1), **factory_kwargs))
         self.bias = Parameter(torch.empty(n_kernels, **factory_kwargs))
         self.init_parameters()

diff --git a/mypy.ini b/mypy.ini
@@ -0,0 +1,11 @@
+[mypy-fft_conv_pytorch.*]
+ignore_missing_imports = True
+
+[mypy-scipy.*]
+ignore_missing_imports = True
+
+[mypy-sklearn.*]
+ignore_missing_imports = True
+
+[mypy-joblib.*]
+ignore_missing_imports = True
diff --git a/setup.py b/setup.py
@@ -9,7 +9,16 @@
     license='LICENSE',
     description='drg_tools contains classes and functions to create and analyze sequence-to-function models.',
     install_requires=[
-        "numpy >= 1.14.2",
-        "torch >= 1.9.0",
+        "einops == 0.8.0",
+        "fft_conv_pytorch == 1.2.0",
+        "joblib",
+        "logomaker>=0.8",
+        "matplotlib>=3.8",
+        "numpy >= 1.26",
+        "pandas>=2.2.2",
+        "scikit-learn>=1.4.2",
+        "scipy>=1.13",
+        "seaborn>=0.13.2",
+        "torch >= 2.3.1",
     ],
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,5 @@ data @@
     **.backup
     trash
     archive
+    .venv
+    .*_cache