diff --git a/src/grads.jl b/src/grads.jl
index fdaf29e..cf51eb1 100644
--- a/src/grads.jl
+++ b/src/grads.jl
@@ -69,6 +69,53 @@ function ChainRulesCore.rrule(::typeof(T_R3_inv), x::AbstractArray{T,N}, R::Abst
     return y, T_R3_inv_pullback
 end
 
+"""
+    softmax1(x, dims = 1)
+
+Behaves like softmax, but as though there was an additional logit of zero along dims (which is excluded from the output). So the values will sum to a value between zero and 1.
+
+See https://www.evanmiller.org/attention-is-off-by-one.html
+"""
+function softmax1(x::AbstractArray{T}; dims = 1) where {T}
+    _zero = T(0)
+    max_ = max.(fast_maximum2(x; dims), _zero)
+    @fastmath out = exp.(x .- max_)
+    tmp = sum(out, dims = dims)
+    out ./ (tmp + exp.(-max_))
+end
+
+# taken from NNlib
+fast_maximum2(x::AbstractArray{T}; dims) where {T} = @fastmath reduce(max, x; dims, init = float(T)(-Inf))
+
+function ∇softmax1_data(dy::AbstractArray{T}, y::AbstractArray{S}; dims = 1) where {T,S}
+    dx = if NNlib.within_gradient(y)
+        tmp = dy .* y
+        tmp .- y .* sum(tmp; dims)
+    else
+        # This path is faster, only safe for 1st derivatives though.
+        # Was previously `∇softmax!(dx, dy, x, y; dims)` to allow CUDA overloads,
+        # but that was slow: https://github.com/FluxML/NNlibCUDA.jl/issues/30
+        out = similar(y, promote_type(T,S))  # sure to be mutable
+        out .= dy .* y
+        out .= out .- y .* sum(out; dims)
+    end
+end
+
+function ChainRulesCore.rrule(::typeof(softmax1), x; dims = 1)
+    y = softmax1(x; dims)
+    softmax_pullback(dy) = (NoTangent(), ∇softmax1_data(unthunk(dy), y; dims))
+    return y, softmax_pullback
+end
+
+
+function pre_softmax_aijh(qh::AbstractArray{T},kh::AbstractArray{T},Ti,qhp::AbstractArray{T},khp::AbstractArray{T}, bij::AbstractArray{T}, gamma_h::AbstractArray{T}) where T
+    w_C = T(sqrt(2f0/(9f0*size(qhp,3))))
+    dim_scale = T(1f0/sqrt(size(qh,1)))
+    w_L = T(1f0/sqrt(3f0))
+
+    w_L.*(dim_scale.*qhTkh(qh,kh) .+ bij .- w_C/2 .* gamma_h .* dropdims(diff_sum_glob(Ti,qhp,khp),dims=(1,3)))
+end
+
 #=
 function diff_sum_glob(T, q, k)
     bs = size(q) 
@@ -151,47 +198,4 @@ function ChainRulesCore.rrule(::typeof(qhTkh), q, k)
     end
     return qhTkh, qhTkh_pullback
 end
-=#
-"""
-softmax1(x, dims = 1)
-
-Behaves like softmax, but as though there was an additional logit of zero along dims (which is excluded from the output). So the values will sum to a value between zero and 1.
-"""
-function softmax1(x::AbstractArray{T}; dims = 1) where {T}
-    _zero = T(0)
-    max_ = max.(fast_maximum2(x; dims), _zero)
-    @fastmath out = exp.(x .- max_)
-    tmp = sum(out, dims = dims)
-    out ./ (tmp + exp.(-max_))
-end
-# Pirated/adapted from NNlib
-fast_maximum2(x::AbstractArray{T}; dims) where {T} = @fastmath reduce(max, x; dims, init = float(T)(-Inf))
-
-function ∇softmax1_data(dy::AbstractArray{T}, y::AbstractArray{S}; dims = 1) where {T,S}
-    dx = if NNlib.within_gradient(y)
-        tmp = dy .* y
-        tmp .- y .* sum(tmp; dims)
-    else
-        # This path is faster, only safe for 1st derivatives though.
-        # Was previously `∇softmax!(dx, dy, x, y; dims)` to allow CUDA overloads,
-        # but that was slow: https://github.com/FluxML/NNlibCUDA.jl/issues/30
-        out = similar(y, promote_type(T,S))  # sure to be mutable
-        out .= dy .* y
-        out .= out .- y .* sum(out; dims)
-    end
-end
-
-function ChainRulesCore.rrule(::typeof(softmax1), x; dims = 1)
-    y = softmax1(x; dims)
-    softmax_pullback(dy) = (NoTangent(), ∇softmax1_data(unthunk(dy), y; dims))
-    return y, softmax_pullback
-end
-
-
-function pre_softmax_aijh(qh::AbstractArray{T},kh::AbstractArray{T},Ti,qhp::AbstractArray{T},khp::AbstractArray{T}, bij::AbstractArray{T}, gamma_h::AbstractArray{T}) where T
-    w_C = T(sqrt(2f0/(9f0*size(qhp,3))))
-    dim_scale = T(1f0/sqrt(size(qh,1)))
-    w_L = T(1f0/sqrt(3f0))
-
-    w_L.*(dim_scale.*qhTkh(qh,kh) .+ bij .- w_C/2 .* gamma_h .* dropdims(diff_sum_glob(Ti,qhp,khp),dims=(1,3)))
-end
+=#
\ No newline at end of file
diff --git a/src/layers.jl b/src/layers.jl
index d8bd34e..9a9f58e 100644
--- a/src/layers.jl
+++ b/src/layers.jl
@@ -399,11 +399,10 @@ function IPCrossAStructureModuleLayer(settings::NamedTuple; dropout_p = 0.1, af
     return IPCrossAStructureModuleLayer(settings, layers)
 end
 
+# We could skip making this struct and just have it be a cross IPA struct. 
 """
 Self IPA Partial Structure Module initialization - single layer - adapted from AF2. 
 """
-
-# We could skip making this struct and just have it be a cross IPA struct. 
 struct IPAStructureModuleLayer
     settings::NamedTuple
     layers::NamedTuple
@@ -418,9 +417,6 @@ function (structuremodulelayer::Union{IPAStructureModuleLayer, IPCrossAStructure
     return structuremodulelayer(T, S, T, S; zij = zij, mask = mask)
 end
 
-"""
-Cross IPA Partial Structure Module - single layer - adapted from AF2. From left to right. 
-"""
 function (structuremodulelayer::Union{IPCrossAStructureModuleLayer, IPAStructureModuleLayer})(T_L, S_L, T_R, S_R; zij = nothing, mask = 0)
     settings = structuremodulelayer.settings
     if settings.c_z > 0 && zij === nothing
diff --git a/src/rotational_utils.jl b/src/rotational_utils.jl
index 2780dd3..3145d8c 100644
--- a/src/rotational_utils.jl
+++ b/src/rotational_utils.jl
@@ -1,15 +1,11 @@
-# from AF2 supplementary: Algorithm 23 Backbone update
-"""
-Takes a 3xN matrix of imaginary quaternion components, `bcd`, sets the real part to `a`, and normalizes to unit quaternions.
-"""
-function bcds2quats(bcd::AbstractMatrix{T}, a::T=T(1)) where T <: Real
+#= from AF2 supplementary: Algorithm 23 Backbone update
+Takes a 3xN matrix of imaginary quaternion components, `bcd`, sets the real part to `a`, and normalizes to unit quaternions. =#
+function bcds2quats(bcd::AbstractMatrix{T}, a::T=T(1)) where T<:Real
     norms = sqrt.(a .+ sum(abs2, bcd, dims=1))
     return vcat(a ./ norms, bcd ./ norms)
 end
 
-"""
-Takes a 4xN matrix of unit quaternions and returns a 3x3xN array of rotation matrices.
-"""
+# Takes a 4xN matrix of unit quaternions and returns a 3x3xN array of rotation matrices.
 function rotmatrix_from_quat(q::AbstractMatrix{<:Real})
     sx = 2q[1, :] .* q[2, :]
     sy = 2q[1, :] .* q[3, :]
@@ -73,12 +69,9 @@ function batched_mul_T2(x::AbstractArray{T1,N}, y::AbstractArray{T2,N}) where {T
     return reshape(z, size(z, 1), size(z, 2), batch_size...)
 end
 
-
-"""
-Applies the SE3 transformations T = (rot,trans) ∈ SE(3)^N
+#= Applies the SE3 transformations T = (rot,trans) ∈ SE(3)^N
 to N batches of m points in R3, i.e., mat ∈ R^(3 x m x N) ↦ T(mat) ∈ R^(3 x m x N).
-Note here that rotations here are represented in matrix form. 
-"""
+Note here that rotations here are represented in matrix form. =#
 function T_R3(x::AbstractArray{T}, R::AbstractArray{T}, t::AbstractArray{T}) where T
     x′ = reshape(x, 3, size(x, 2), :)
     R′ = reshape(R, 3, 3, :)
@@ -88,10 +81,8 @@ function T_R3(x::AbstractArray{T}, R::AbstractArray{T}, t::AbstractArray{T}) whe
     return y
 end
 
-"""
-Applies the group inverse of the SE3 transformations T = (R,t) ∈ SE(3)^N to N batches of m points in R3,
-such that T^-1(T*x) = T^-1(Rx+t) =  R^T(Rx+t-t) = x.
-"""
+#= Applies the group inverse of the SE3 transformations T = (R,t) ∈ SE(3)^N to N batches of m points in R3,
+such that T^-1(T*x) = T^-1(Rx+t) =  R^T(Rx+t-t) = x. =#
 function T_R3_inv(y::AbstractArray{T}, R::AbstractArray{T}, t::AbstractArray{T}) where T
     y′ = reshape(y, 3, size(y, 2), :)
     R′ = reshape(R, 3, 3, :)
@@ -101,9 +92,7 @@ function T_R3_inv(y::AbstractArray{T}, R::AbstractArray{T}, t::AbstractArray{T})
     return x
 end
 
-"""
-Returns the composition of two SE(3) transformations T_1 and T_2. If T1 = (R1,t1), and T2 = (R2,t2) then T1*T2 = (R1*R2, R1*t2 + t1).
-"""
+# Returns the composition of two SE(3) transformations T_1 and T_2. If T1 = (R1,t1), and T2 = (R2,t2) then T1*T2 = (R1*R2, R1*t2 + t1).
 function T_T(T_1, T_2)
     R1, t1 = T_1 
     R2, t2 = T_2
@@ -112,9 +101,7 @@ function T_T(T_1, T_2)
     return (new_rot,new_trans)
 end
 
-"""
-Takes a 6-dim vec and maps to a rotation matrix and translation vector, which is then applied to the input frames.
-"""
+# Takes a 6-dim vec and maps to a rotation matrix and translation vector, which is then applied to the input frames.
 function update_frame(Ti, arr)
     bcds = reshape(arr[:,1,:,:],3,:)
     rotmat = rotmatrix_from_quat(bcds2quats(bcds))  
@@ -130,9 +117,7 @@ unzip(a) = map(x->getfield.(a, x), fieldnames(eltype(a)))
 
 centroid(coords::AbstractMatrix) = vec(sum(coords; dims=2)) / size(coords, 2)
 
-"""
-Get frame from residue
-"""
+# Get rotation matrix and translation from residue
 function calculate_residue_rotation_and_translation(residue_xyz::AbstractMatrix)
     # Returns the rotation matrix and the translation of a gien residue. 
     N = residue_xyz[:, 1]
@@ -152,30 +137,29 @@ function calculate_residue_rotation_and_translation(residue_xyz::AbstractMatrix)
 end
 
 """
-Get the assosciated SE(3) frame for all residues in a prot
+    get_T(coords::Array{<:Real, 3})
+Get the assosciated SE(3) frame for all residues in a protein backbone represented as a 3x3xL array of coordinates.
 """
-function get_T(protxyz::Array{<:Real, 3}) 
-    ti = stack.(unzip([calculate_residue_rotation_and_translation(protxyz[:,:,i]) for i in axes(protxyz,3)]))
-    return (ti[1],reshape(ti[2],3,1,:))
+function get_T(coords::Array{<:Real, 3})
+    Ti = stack.(unzip([calculate_residue_rotation_and_translation(coords[:,:,i]) for i in axes(coords,3)]))
+    return (Ti[1],reshape(Ti[2],3,1,:))
 end
 
 """
-Get the assosciated SE(3) frames for all residues in a batch of prots 
+Get the associated SE(3) frames for all residues in a batch of proteins
 """
-function get_T_batch(protxyz::Array{<:Real, 4})
-    rots = zeros(3,3,size(protxyz)[3:4]...)
-    trans = zeros(3,1,size(protxyz)[3:4]...)
-    for j in axes(protxyz,4)
-        Tij = get_T(protxyz[:,:,:,j])
+function get_T_batch(coords::Array{<:Real, 4})
+    rots = zeros(3,3,size(coords)[3:4]...)
+    trans = zeros(3,1,size(coords)[3:4]...)
+    for j in axes(coords,4)
+        Tij = get_T(coords[:,:,:,j])
         rots[:,:,:,j] = Tij[1]
         trans[:,:,:,j] = Tij[2]
     end
     return (rots, trans)
 end
 
-"""
-Index into a T up to index i. 
-"""
+# Index into a T up to index i.
 function T_till(T,i)
     Tr, Tt = T[1][:,:,1:i,:], T[2][:,:,1:i,:]
     return Tr, Tt
diff --git a/test/runtests.jl b/test/runtests.jl
index e07da30..061267a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -100,8 +100,8 @@ using ChainRulesTestUtils
 
         # Get 1 global SE(3) transformation for each batch.
         T_glob = (get_rotation(batch_size), get_translation(batch_size))
-        T_GlobL = (stack([T_glob[1] for i in 1:framesL],dims = 3), stack([T_glob[2] for i in 1:framesL], dims = 3))
-        T_GlobR = (stack([T_glob[1] for i in 1:framesR],dims = 3), stack([T_glob[2] for i in 1:framesR], dims = 3))
+        T_GlobL = (stack([T_glob[1] for i in 1:framesL],dims = 3), stack([T_glob[2] for i in 1:framesL], dims=3))
+        T_GlobR = (stack([T_glob[1] for i in 1:framesR],dims = 3), stack([T_glob[2] for i in 1:framesR], dims=3))
         
         T_newL = InvariantPointAttention.T_T(T_GlobL,T_locL)
         T_newR = InvariantPointAttention.T_T(T_GlobR,T_locR)