From bbdbcf06a408835b7d077aa9835c3fcdb4bbfe14 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Fri, 19 Apr 2024 13:59:50 +0530
Subject: [PATCH 1/4] Bugfix in matrix multiplication

---
 src/fillalgebra.jl | 27 ++++++++++++---------------
 test/runtests.jl   | 12 +++++++++++-
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/fillalgebra.jl b/src/fillalgebra.jl
index ea454fdf..a8f5e474 100644
--- a/src/fillalgebra.jl
+++ b/src/fillalgebra.jl
@@ -224,30 +224,27 @@ function mul!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractFillMatrix, a
 end
 
 function copyfirstcol!(C)
-    @views for i in axes(C,2)[2:end]
-        C[:, i] .= C[:, 1]
-    end
-    return C
-end
-function copyfirstcol!(C::Union{Adjoint, Transpose})
-    # in this case, we copy the first row of the parent to others
-    Cp = parent(C)
-    for colind in axes(Cp, 2)
-        Cp[2:end, colind] .= Cp[1, colind]
-    end
+    @views C[:, begin+1:end] .= _firstcol(C)
     return C
 end
 
-_firstcol(C::AbstractMatrix) = view(C, :, 1)
-_firstcol(C::Union{Adjoint, Transpose}) = view(parent(C), 1, :)
+_firstcol(C::AbstractMatrix) = first(eachcol(C))
+# remove wrappers in case of transposed matrices
+function _firstcol(C::Union{Adjoint{<:Real, <:AbstractMatrix}, Transpose{<:Any, <:AbstractMatrix}})
+    view(parent(C), firstindex(parent(C),1), :)
+end
 
 function _mulfill!(C, A, B::AbstractFillMatrix, alpha, beta)
     check_matmul_sizes(C, A, B)
     if iszero(size(B,2))
         return rmul!(C, beta)
     end
-    mul!(_firstcol(C), A, view(B, :, 1), alpha, beta)
-    copyfirstcol!(C)
+    if iszero(beta)
+        mul!(_firstcol(C), A, view(B, :, firstindex(B,2)), alpha, beta)
+        copyfirstcol!(C)
+    else
+        mul!(C, A, B, alpha, beta)
+    end
     C
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 02992733..a598ac7a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1701,7 +1701,7 @@ end
     @test (1:5)'E == (1.0:5)'
     @test E*E ≡ E
 
-    for T in (Float64, ComplexF64)
+    @testset for T in (Float64, ComplexF64)
         fv = T == Float64 ? Float64(1.6) : ComplexF64(1.6, 1.3)
         n  = 10
         k  = 12
@@ -1716,6 +1716,16 @@ end
         @test transpose(A)*fillmat ≈ transpose(A)*Array(fillmat)
         @test adjoint(A)*fillvec ≈ adjoint(A)*Array(fillvec)
         @test adjoint(A)*fillmat ≈ adjoint(A)*Array(fillmat)
+
+        # inplace C = F * B' * alpha + C * beta
+        F = Fill(fv, m, k)
+        A = Array(F)
+        B = rand(T, n, k)
+        C = rand(T, m, n)
+        @testset for f in (adjoint, transpose)
+            @test mul!(copy(C), F, f(B)) ≈ mul!(copy(C), A, f(B))
+            @test mul!(copy(C), F, f(B), 1.0, 2.0) ≈ mul!(copy(C), A, f(B), 1.0, 2.0)
+        end
     end
 
     @testset "non-commutative" begin

From e641472f1ccdfcf0220cef171079bc43c6382b4d Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Sat, 20 Apr 2024 16:41:51 +0530
Subject: [PATCH 2/4] _mulfill implementation for all matrix orderings

---
 src/fillalgebra.jl | 82 ++++++++++++++++++++++++++++++----------------
 1 file changed, 53 insertions(+), 29 deletions(-)

diff --git a/src/fillalgebra.jl b/src/fillalgebra.jl
index a8f5e474..dc945f00 100644
--- a/src/fillalgebra.jl
+++ b/src/fillalgebra.jl
@@ -212,7 +212,8 @@ for (T, f) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
     end
 end
 
-function mul!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractFillMatrix, alpha::Number, beta::Number)
+# unnecessary indirection, added for ambiguity resolution
+function _mulfill!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractFillMatrix, alpha, beta)
     check_matmul_sizes(C, A, B)
     ABα = getindex_value(A) * getindex_value(B) * alpha * size(B,1)
     if iszero(beta)
@@ -220,56 +221,79 @@ function mul!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractFillMatrix, a
     else
         C .= ABα .+ C .* beta
     end
-    C
+    return C
+end
+
+function mul!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractFillMatrix, alpha::Number, beta::Number)
+    _mulfill!(C, A, B, alpha, beta)
+    return C
 end
 
 function copyfirstcol!(C)
-    @views C[:, begin+1:end] .= _firstcol(C)
+    C[:, begin+1:end] .= _firstcol(C)
     return C
 end
 
 _firstcol(C::AbstractMatrix) = first(eachcol(C))
-# remove wrappers in case of transposed matrices
-function _firstcol(C::Union{Adjoint{<:Real, <:AbstractMatrix}, Transpose{<:Any, <:AbstractMatrix}})
-    view(parent(C), firstindex(parent(C),1), :)
+
+function copyfirstrow!(C)
+    C[begin+1:end, :] .= permutedims(_firstrow(C))
+    return C
 end
+_firstrow(C::AbstractMatrix) = first(eachrow(C))
 
-function _mulfill!(C, A, B::AbstractFillMatrix, alpha, beta)
+function _mulfill!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractFillMatrix, alpha, beta)
     check_matmul_sizes(C, A, B)
-    if iszero(size(B,2))
-        return rmul!(C, beta)
-    end
+    iszero(size(B,2)) && return C # no columns in B and C, empty matrix
     if iszero(beta)
-        mul!(_firstcol(C), A, view(B, :, firstindex(B,2)), alpha, beta)
+        # the mat-vec product sums along the rows of A
+        mul!(_firstcol(C), A, _firstcol(B), alpha, beta)
         copyfirstcol!(C)
     else
-        mul!(C, A, B, alpha, beta)
+        # the mat-vec product sums along the rows of A, which produces the first column of ABα
+        # allocate a temporary column vector to store the result
+        v = A * (_firstcol(B) * alpha)
+        C .= v .+ C .* beta
+    end
+    return C
+end
+function _mulfill!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractMatrix, alpha, beta)
+    check_matmul_sizes(C, A, B)
+    iszero(size(A,1)) && return C # no rows in A and C, empty matrix
+    Aval = getindex_value(A)
+    if iszero(beta)
+        Crow = _firstrow(C)
+        # sum along the columns of B
+        Crow .= Ref(Aval) .* sum.(eachcol(B)) .* alpha
+        copyfirstrow!(C)
+    else
+        # sum along the columns of B, and allocate the result.
+        # This is the first row of ABα
+        ABα_row = Ref(Aval) .* sum.(eachcol(B)) .* alpha
+        C .= permutedims(ABα_row) .+ C .* beta
     end
-    C
+    return C
 end
 
 function mul!(C::StridedMatrix, A::StridedMatrix, B::AbstractFillMatrix, alpha::Number, beta::Number)
     _mulfill!(C, A, B, alpha, beta)
+    return C
 end
-
-for T in (:Adjoint, :Transpose)
-    @eval function mul!(C::StridedMatrix, A::$T{<:Any, <:StridedMatrix}, B::AbstractFillMatrix, alpha::Number, beta::Number)
-        _mulfill!(C, A, B, alpha, beta)
-    end
-end
-
 function mul!(C::StridedMatrix, A::AbstractFillMatrix, B::StridedMatrix, alpha::Number, beta::Number)
-    check_matmul_sizes(C, A, B)
-    for (colC, colB) in zip(eachcol(C), eachcol(B))
-        mul!(colC, A, colB, alpha, beta)
-    end
-    C
+    _mulfill!(C, A, B, alpha, beta)
+    return C
 end
 
-for (T, f) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval function mul!(C::StridedMatrix, A::AbstractFillMatrix, B::$T{<:Any, <:StridedMatrix}, alpha::Number, beta::Number)
-        _mulfill!($f(C), $f(B), $f(A), alpha, beta)
-        C
+for T in (:Adjoint, :Transpose)
+    @eval begin
+        function mul!(C::StridedMatrix, A::$T{<:Any, <:StridedMatrix}, B::AbstractFillMatrix, alpha::Number, beta::Number)
+            _mulfill!(C, A, B, alpha, beta)
+            return C
+        end
+        function mul!(C::StridedMatrix, A::AbstractFillMatrix, B::$T{<:Any, <:StridedMatrix}, alpha::Number, beta::Number)
+            _mulfill!(C, A, B, alpha, beta)
+            return C
+        end
     end
 end
 

From 0fe575c23cfc385bd97a1f49b588bc9125bf6b06 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Sat, 20 Apr 2024 19:25:11 +0530
Subject: [PATCH 3/4] Loop in copyfirstrow!

---
 src/fillalgebra.jl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/fillalgebra.jl b/src/fillalgebra.jl
index dc945f00..10fe0f00 100644
--- a/src/fillalgebra.jl
+++ b/src/fillalgebra.jl
@@ -237,7 +237,12 @@ end
 _firstcol(C::AbstractMatrix) = first(eachcol(C))
 
 function copyfirstrow!(C)
-    C[begin+1:end, :] .= permutedims(_firstrow(C))
+    # C[begin+1:end, ind] .= permutedims(_firstrow(C))
+    # we loop here as the aliasing check isn't smart enough to
+    # detect that the two sides don't alias, and ends up materializing the RHS
+    for (ind, v) in pairs(_firstrow(C))
+        C[begin+1:end, ind] .= Ref(v)
+    end
     return C
 end
 _firstrow(C::AbstractMatrix) = first(eachrow(C))

From e1e6c1b701e692acf6f76ce013ad666cbbc0c37d Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Mon, 22 Apr 2024 14:44:16 +0530
Subject: [PATCH 4/4] Revert some unnecessary changes

---
 src/fillalgebra.jl | 4 +++-
 test/runtests.jl   | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/fillalgebra.jl b/src/fillalgebra.jl
index 10fe0f00..a887eca2 100644
--- a/src/fillalgebra.jl
+++ b/src/fillalgebra.jl
@@ -230,7 +230,9 @@ function mul!(C::AbstractMatrix, A::AbstractFillMatrix, B::AbstractFillMatrix, a
 end
 
 function copyfirstcol!(C)
-    C[:, begin+1:end] .= _firstcol(C)
+    @views for i in axes(C,2)[2:end]
+        C[:, i] .= C[:, 1]
+    end
     return C
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index a598ac7a..34c77108 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1701,7 +1701,7 @@ end
     @test (1:5)'E == (1.0:5)'
     @test E*E ≡ E
 
-    @testset for T in (Float64, ComplexF64)
+    for T in (Float64, ComplexF64)
         fv = T == Float64 ? Float64(1.6) : ComplexF64(1.6, 1.3)
         n  = 10
         k  = 12