From defba13423935cec39c9c2a98748802c43cb85f3 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub@users.noreply.github.com>
Date: Mon, 8 Aug 2022 12:28:49 +0530
Subject: [PATCH] Inplace polynomial transforms (#142)

* inplace polynomial transforms

* fix for Ultraspherical

* version bump to v0.6.5 [skip ci]
---
 Project.toml                     |  2 +-
 src/Caching/banded.jl            |  2 +-
 src/Caching/matrix.jl            | 11 +++--
 src/LinearAlgebra/helper.jl      |  2 +
 src/Operators/SubOperator.jl     |  8 ++++
 src/Operators/general/algebra.jl | 24 ++++++-----
 src/Operators/ldiv.jl            |  1 +
 src/Operators/qr.jl              | 37 +++++++++++-----
 src/Space.jl                     | 74 ++++++++++++++++----------------
 test/SpacesTest.jl               | 19 ++++++++
 10 files changed, 114 insertions(+), 66 deletions(-)

diff --git a/Project.toml b/Project.toml
index 43a5bec0..ca1fe623 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ApproxFunBase"
 uuid = "fbd15aa5-315a-5a7d-a8a4-24992e37be05"
-version = "0.6.4"
+version = "0.6.5"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/Caching/banded.jl b/src/Caching/banded.jl
index f5602650..97809413 100644
--- a/src/Caching/banded.jl
+++ b/src/Caching/banded.jl
@@ -150,7 +150,7 @@ end
 
 ## back substitution
 # loop to avoid ambiguity with AbstractTRiangular
-for ArrTyp in (:AbstractVector, :AbstractMatrix)
+for ArrTyp in (:AbstractVector, :AbstractMatrix, :StridedVector)
     @eval function ldiv!(U::UpperTriangular{T, <:SubArray{T, 2, <:BandedMatrix{T}, Tuple{UnitRange{Int}, UnitRange{Int}}, false}},
                              u::$ArrTyp{T}) where T
         n = size(u,1)
diff --git a/src/Caching/matrix.jl b/src/Caching/matrix.jl
index 34b06f4b..d8fa0c90 100644
--- a/src/Caching/matrix.jl
+++ b/src/Caching/matrix.jl
@@ -40,9 +40,8 @@ end
 
 # Apply Householder
 
-
 function mulpars(Ac::Adjoint{T,<:QROperatorQ{QROperator{RR,Matrix{T},T},T}},
-                      B::AbstractVector{T},tolerance,maxlength) where {RR,T}
+                      B::AbstractVector{T},tolerance,maxlength, inplace::Val = Val(false)) where {RR,T}
     A = parent(Ac)
     if length(B) > A.QR.ncols
         # upper triangularize extra columns to prepare for \
@@ -52,7 +51,7 @@ function mulpars(Ac::Adjoint{T,<:QROperatorQ{QROperator{RR,Matrix{T},T},T}},
     H=A.QR.H
     M=size(H,1)
     m=length(B)
-    Y=pad(B,m+M+10)
+    Y=_pad!!(inplace)(B,m+M+10)
 
     k=1
     yp=view(Y,1:M)
@@ -86,7 +85,7 @@ end
 
 function mulpars(Ac::Adjoint{T,<:QROperatorQ{QROperator{RR,Matrix{T},T},T}},
                        B::AbstractVector{T},
-                       tolerance,maxlength) where {RR,T<:BlasFloat}
+                       tolerance,maxlength, inplace::Val = Val(false)) where {RR,T<:BlasFloat}
 
     A = parent(Ac)
 
@@ -99,7 +98,7 @@ function mulpars(Ac::Adjoint{T,<:QROperatorQ{QROperator{RR,Matrix{T},T},T}},
 
     if size(A.QR.H,1) == 1  # diagonal scaling, avoid growing
         diagv=view(A.QR.H,1,1:length(B))
-        ret=similar(B)
+        ret = inplace isa Val{true} ? B : similar(B)
         @simd for k=1:length(ret)
             @inbounds ret[k]=(1-2A.QR.H[1,k]^2)*B[k]
         end
@@ -114,7 +113,7 @@ function mulpars(Ac::Adjoint{T,<:QROperatorQ{QROperator{RR,Matrix{T},T},T}},
     sz=sizeof(T)
 
     m=length(B)
-    Y=pad(B,m+M+10)
+    Y=_pad!!(inplace)(B,m+M+10)
     y=pointer(Y)
 
     k=1
diff --git a/src/LinearAlgebra/helper.jl b/src/LinearAlgebra/helper.jl
index 19e1c7fd..56594e53 100644
--- a/src/LinearAlgebra/helper.jl
+++ b/src/LinearAlgebra/helper.jl
@@ -301,6 +301,8 @@ function pad(v::AbstractVector{T}, ::PosInfinity) where T
     end
 end
 
+_pad!!(::Val{false}) = pad
+_pad!!(::Val{true}) = pad!
 
 #TODO:padleft!
 
diff --git a/src/Operators/SubOperator.jl b/src/Operators/SubOperator.jl
index cb878541..667e058b 100644
--- a/src/Operators/SubOperator.jl
+++ b/src/Operators/SubOperator.jl
@@ -351,3 +351,11 @@ function mul_coefficients(A::SubOperator{T,B,Tuple{UnitRange{Int},UnitRange{Int}
         view(A,:,1:length(b))*b
     end
 end
+function mul_coefficients!(A::SubOperator{T,B,Tuple{UnitRange{Int},UnitRange{Int}}},b) where {T,B}
+    if size(A,2) == length(b)
+        mul!(b, AbstractMatrix(A), b)
+    else
+        mul!(b, view(A,:,1:length(b)), b)
+    end
+    return b
+end
diff --git a/src/Operators/general/algebra.jl b/src/Operators/general/algebra.jl
index 5cad928c..c6e2a9c6 100644
--- a/src/Operators/general/algebra.jl
+++ b/src/Operators/general/algebra.jl
@@ -535,18 +535,22 @@ end
 
 
 ## Operations
-function mul_coefficients(A::Operator,b)
-    n=size(b,1)
-    ret = n>0 ? mul_coefficients(view(A,FiniteRange,1:n),b) : b
-end
+for mulcoeff in [:mul_coefficients, :mul_coefficients!]
+    @eval begin
+        function $mulcoeff(A::Operator,b)
+            n=size(b,1)
+            ret = n>0 ? $mulcoeff(view(A,FiniteRange,1:n),b) : b
+        end
 
-function mul_coefficients(A::TimesOperator,b)
-    ret = b
-    for k=length(A.ops):-1:1
-        ret = mul_coefficients(A.ops[k],ret)
-    end
+        function $mulcoeff(A::TimesOperator,b)
+            ret = b
+            for k=length(A.ops):-1:1
+                ret = $mulcoeff(A.ops[k],ret)
+            end
 
-    ret
+            ret
+        end
+    end
 end
 
 
diff --git a/src/Operators/ldiv.jl b/src/Operators/ldiv.jl
index abbbb090..f7bf30b3 100644
--- a/src/Operators/ldiv.jl
+++ b/src/Operators/ldiv.jl
@@ -43,6 +43,7 @@ end
 \(A::Operator,B::MatrixFun;kwds...) = \(A,Array(B);kwds...)
 
 ldiv_coefficients(A::Operator,b;kwds...) = ldiv_coefficients(qr(A),b;kwds...)
+ldiv_coefficients!(A::Operator,b;kwds...) = ldiv_coefficients!(qr(A),b;kwds...)
 
 \(A::Operator,B::Operator) = TimesOperator(inv(A),B)
 
diff --git a/src/Operators/qr.jl b/src/Operators/qr.jl
index 77828eec..1e9fe8a4 100644
--- a/src/Operators/qr.jl
+++ b/src/Operators/qr.jl
@@ -1,7 +1,3 @@
-
-
-
-
 mutable struct QROperator{CO,MT,T} <: Operator{T}
     R_cache::CO
     H::MT # Contains the Householder reflections
@@ -147,9 +143,17 @@ det(A::Operator) = det(qr(A))
 
 mul_coefficients(At::Transpose{T,<:QROperatorQ{T}},B::AbstractVector{T}) where {T<:Real} = parent(At)'*B
 mul_coefficients(At::Transpose{T,<:QROperatorQ{T}},B::AbstractMatrix{T}) where {T<:Real} = parent(At)'*B
+mul_coefficients!(At::Transpose{T,<:QROperatorQ{T}},B::AbstractVector{T}) where {T<:Real} = mul!(B, parent(At)', B)
+mul_coefficients!(At::Transpose{T,<:QROperatorQ{T}},B::AbstractMatrix{T}) where {T<:Real} = mul!(B, parent(At)', B)
 
-mul_coefficients(Ac::Adjoint{T,<:QROperatorQ{QR,T}},B::AbstractVector{T};tolerance=eps(eltype(Ac))/10,maxlength=1000000) where {QR,T} =
-        mulpars(Ac,B,tolerance,maxlength)
+function mul_coefficients(Ac::Adjoint{T,<:QROperatorQ{QR,T}},B::AbstractVector{T};
+            tolerance=eps(eltype(Ac))/10,maxlength=1000000) where {QR,T}
+    mulpars(Ac,B,tolerance,maxlength)
+end
+function mul_coefficients!(Ac::Adjoint{T,<:QROperatorQ{QR,T}},B::AbstractVector{T};
+            tolerance=eps(eltype(Ac))/10,maxlength=1000000) where {QR,T}
+    mulpars(Ac,B,tolerance,maxlength,Val(true))
+end
 
 mul_coefficients(Ac::Adjoint{T,<:QROperatorQ{QR,T}},B::AbstractVector{V};opts...) where {QR,T,V} =
     mul_coefficients(Ac,AbstractVector{T}(B); opts...)
@@ -167,21 +171,30 @@ end
 
 
 ldiv_coefficients(A::QROperatorQ, B; opts...) = mul_coefficients(A', B; opts...)
+ldiv_coefficients!(A::QROperatorQ, B; opts...) = mul_coefficients!(A', B; opts...)
 \(A::QROperatorQ, B::Fun; opts...) = *(A', B; opts...)
 
 
 # R
-function ldiv_coefficients(R::QROperatorR, b::AbstractVector)
-    if length(b) > R.QR.ncols
+function uppertriangularview!(R, lenb)
+    if lenb > R.QR.ncols
         # upper triangularize columns
-        resizedata!(R.QR, :, length(b))
+        resizedata!(R.QR, :, lenb)
     end
-    UpperTriangular(view(R.QR.R_cache.data, 1:length(b), 1:length(b))) \ b
+    return UpperTriangular(view(R.QR.R_cache.data, 1:lenb, 1:lenb))
+end
+function ldiv_coefficients(R::QROperatorR, b::AbstractVector)
+    U = uppertriangularview!(R, length(b))
+    U \ b
+end
+function ldiv_coefficients!(R::QROperatorR, b::AbstractVector)
+    U = uppertriangularview!(R, length(b))
+    ldiv!(U, b)
 end
 
 \(R::QROperatorR,b::Fun{SequenceSpace};kwds...) =
     Fun(domainspace(R),ldiv_coefficients(R,b.coefficients;kwds...))
-\(A::QROperatorR,b::Fun;kwds...) = error("\\ not implement for $(typeof(b)) right-hand sides")
+\(A::QROperatorR,b::Fun;kwds...) = error(\, " not implement for ", typeof(b), " right-hand sides")
 
 
 # QR
@@ -189,6 +202,8 @@ end
 for TYP in (:Real,:Complex,:Number)
     @eval ldiv_coefficients(QR::QROperator{CO,MT,T},b::AbstractVector{T}; kwds...) where {CO,MT,T<:$TYP} =
         ldiv_coefficients(QR.R, mul_coefficients(QR.Q',b;kwds...))
+    @eval ldiv_coefficients!(QR::QROperator{CO,MT,T},b::AbstractVector{T}; kwds...) where {CO,MT,T<:$TYP} =
+        ldiv_coefficients!(QR.R, mul_coefficients!(QR.Q',b;kwds...))
 end
 
 
diff --git a/src/Space.jl b/src/Space.jl
index e72770b8..435ef15f 100644
--- a/src/Space.jl
+++ b/src/Space.jl
@@ -303,18 +303,22 @@ coefficients(f::AbstractVector,sp1::Space,::Type{T2}) where {T2<:Space} = coeffi
 
 ## coefficients defaults to calling Conversion, otherwise it tries to pipe through Chebyshev
 
+_mul_coefficients!!(inplace::Val{true}) = mul_coefficients!
+_mul_coefficients!!(inplace::Val{false}) = mul_coefficients
+_ldiv_coefficients!!(inplace::Val{true}) = ldiv_coefficients!
+_ldiv_coefficients!!(inplace::Val{false}) = ldiv_coefficients
 
 _Fun(v::AbstractVector, sp) = Fun(sp, v)
 _Fun(v, sp) = Fun(v, sp)
-function defaultcoefficients(f,a,b)
+function defaultcoefficients(f,a,b,inplace = Val(false))
     ct=conversion_type(a,b) # gives a space that has a banded conversion to both a and b
 
     if spacescompatible(a,b)
         f
     elseif hasconversion(a,b)
-        mul_coefficients(Conversion(a,b),f)
+        _mul_coefficients!!(inplace)(Conversion(a,b),f)
     elseif hasconversion(b,a)
-        ldiv_coefficients(Conversion(b,a),f)
+        _ldiv_coefficients!!(inplace)(Conversion(b,a),f)
     else
         csp=canonicalspace(a)
 
@@ -323,14 +327,15 @@ function defaultcoefficients(f,a,b)
         end
         if spacescompatible(a,csp) || spacescompatible(b,csp)
             # b is csp too, so we are stuck, try Fun constructor
-            coefficients(default_Fun(_Fun(f,a),b))
+            _coefficients!!(inplace)(default_Fun(_Fun(f,a),b))
         else
-            coefficients(f,a,csp,b)
+            _coefficients!!(inplace)(f,a,csp,b)
         end
     end
 end
 
 coefficients(f,a,b) = defaultcoefficients(f,a,b)
+coefficients!(f,a,b) = defaultcoefficients(f,a,b,Val(true))
 
 
 
@@ -368,50 +373,43 @@ end
 
 for Typ in (:CanonicalTransformPlan,:ICanonicalTransformPlan)
     @eval begin
-        struct $Typ{T,SP,PL,CSP} <: AbstractTransformPlan{T}
+        struct $Typ{T,SP,PL,CSP,inplace} <: AbstractTransformPlan{T}
             space::SP
             plan::PL
             canonicalspace::CSP
         end
-        $Typ(space,plan,csp) =
-            $Typ{eltype(plan),typeof(space),typeof(plan),typeof(csp)}(space,plan,csp)
-        $Typ(::Type{T},space,plan,csp) where {T} =
-            $Typ{T,typeof(space),typeof(plan),typeof(csp)}(space,plan,csp)
+        $Typ(space,plan,csp) = $Typ(space,plan,csp,Val(false))
+        $Typ(space,plan,csp,ip::Val{inplace}) where {inplace} =
+            $Typ{eltype(plan),typeof(space),typeof(plan),typeof(csp),inplace}(space,plan,csp)
     end
 end
-
+inplace(::CanonicalTransformPlan{<:Any,<:Any,<:Any,<:Any,IP}) where {IP} = IP
+inplace(::ICanonicalTransformPlan{<:Any,<:Any,<:Any,<:Any,IP}) where {IP} = IP
 
 # Canonical plan uses coefficients
-function CanonicalTransformPlan(space,v)
-    csp = canonicalspace(space)
-    CanonicalTransformPlan(eltype(v),space,plan_transform(csp,v),csp)
-end
 function checkcanonicalspace(sp)
     csp = canonicalspace(sp)
     sp == csp && error("Override for $sp")
     csp
 end
-function plan_transform(sp::Space,vals)
-    csp = checkcanonicalspace(sp)
-    CanonicalTransformPlan(sp,plan_transform(csp,vals),csp)
+_plan_transform!!(::Val{true}) = plan_transform!
+_plan_transform!!(::Val{false}) = plan_transform
+function CanonicalTransformPlan(space, v, inplace::Val = Val(false))
+    csp = checkcanonicalspace(space)
+    CanonicalTransformPlan(space, _plan_transform!!(inplace)(csp,v), csp, inplace)
 end
-
-function ICanonicalTransformPlan(space,v)
-    csp = canonicalspace(space)
-    cfs = coefficients(v,space,csp)
-    ICanonicalTransformPlan(eltype(v),space,plan_itransform(csp,cfs),csp)
+plan_transform(sp::Space,vals) = CanonicalTransformPlan(sp, vals, Val(false))
+plan_transform!(sp::Space,vals) = CanonicalTransformPlan(sp, vals, Val(true))
+
+_plan_itransform!!(::Val{true}) = plan_itransform!
+_plan_itransform!!(::Val{false}) = plan_itransform
+function ICanonicalTransformPlan(space, v, ip::Val{inplace} = Val(false)) where {inplace}
+    csp = checkcanonicalspace(space)
+    cfs = inplace ? coefficients(v,space,csp) : v
+    ICanonicalTransformPlan(space, _plan_itransform!!(ip)(csp,cfs), csp, ip)
 end
-function plan_itransform(sp::Space,v)
-    csp = checkcanonicalspace(sp)
-    cfs = coefficients(v,sp,csp)
-    ICanonicalTransformPlan(sp,plan_itransform(csp,cfs),csp)
-end
-
-
-plan_transform!(sp::Space,vals) = error("Override for $sp")
-plan_itransform!(sp::Space,cfs) = error("Override for $sp")
-
-
+plan_itransform(sp::Space,v) = ICanonicalTransformPlan(sp, v, Val(false))
+plan_itransform!(sp::Space,v) = ICanonicalTransformPlan(sp, v, Val(true))
 
 # transform converts from values at points(S,n) to coefficients
 # itransform converts from coefficients to values at points(S,n)
@@ -423,9 +421,11 @@ itransform!(S::Space,cfs) = plan_itransform!(S,cfs)*cfs
 transform!(S::Space,cfs) = plan_transform!(S,cfs)*cfs
 
 
-*(P::CanonicalTransformPlan,vals::AbstractVector) = coefficients(P.plan*vals,P.canonicalspace,P.space)
-*(P::ICanonicalTransformPlan,cfs::AbstractVector) = P.plan*coefficients(cfs,P.space,P.canonicalspace)
-
+_coefficients!!(::Val{true}) = coefficients!
+_coefficients!!(::Val{false}) = coefficients
+_mul(P::CanonicalTransformPlan, ip, vals) = _coefficients!!(ip)(P.plan * vals, P.canonicalspace, P.space)
+_mul(P::ICanonicalTransformPlan, ip, cfs) = P.plan * _coefficients!!(ip)(cfs, P.space, P.canonicalspace)
+*(P::Union{CanonicalTransformPlan, ICanonicalTransformPlan}, vals::AbstractVector) = _mul(P, Val(inplace(P)), vals)
 
 
 for OP in (:plan_transform,:plan_itransform,:plan_transform!,:plan_itransform!)
diff --git a/test/SpacesTest.jl b/test/SpacesTest.jl
index 791fe0f1..6f034d3f 100644
--- a/test/SpacesTest.jl
+++ b/test/SpacesTest.jl
@@ -177,5 +177,24 @@ using ApproxFunOrthogonalPolynomials
         f = Fun(x->x^2, Chebyshev())
         v = coefficients(f, Chebyshev(), Legendre())
         @test v ≈ coefficients(Fun(x->x^2, Legendre()))
+
+        @testset "inplace transform" begin
+            @testset for sp_c in Any[Legendre(), Chebyshev(), Jacobi(1,2), Jacobi(0.3, 2.3), 
+                    Ultraspherical(1), Ultraspherical(2)]
+                @testset for sp in Any[sp_c, NormalizedPolynomialSpace(sp_c)]
+                    v = rand(10)
+                    v2 = copy(v)
+                    @test itransform!(sp, transform!(sp, v)) ≈ v
+                    @test transform!(sp, v) ≈ transform(sp, v2)
+                    @test itransform(sp, v) ≈ v2
+                    @test itransform!(sp, v) ≈ v2
+
+                    # different vector
+                    p_fwd = ApproxFunBase.plan_transform!(sp, v)
+                    p_inv = ApproxFunBase.plan_itransform!(sp, v)
+                    @test p_inv * copy(p_fwd * copy(v)) ≈ v
+                end
+            end
+        end
     end
 end