From e1fec2fe24e4caf3e86ddba51e1d69c0beb4af23 Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson Date: Thu, 14 Sep 2023 16:20:35 +0200 Subject: [PATCH] add exclusion zone option --- Project.toml | 2 +- src/MatrixProfile.jl | 8 +++++--- test/runtests.jl | 11 +++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index cd87321..9ceb334 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,7 @@ Distances = "0.7, 0.8, 0.9, 0.10" LoopVectorization = "0.7.4, 0.8, 0.9, 0.10, 0.11, 0.12" ProgressMeter = "1.2, 1.3" RecipesBase = "0.8, 1.0" -SlidingDistancesBase = "0.2, 0.3" +SlidingDistancesBase = "0.2, 0.3.5" julia = "1" [extras] diff --git a/src/MatrixProfile.jl b/src/MatrixProfile.jl index 1d882a4..733c102 100644 --- a/src/MatrixProfile.jl +++ b/src/MatrixProfile.jl @@ -23,13 +23,15 @@ struct Profile{TT,TP,QT} end """ - profile = matrix_profile(T, m, [dist = ZEuclidean()]; showprogress=true) + profile = matrix_profile(T, m, [dist = ZEuclidean()]; showprogress=true, exclusion_zone = 0) Return the matrix profile and the profile indices of time series `T` with window length `m`. See fields `profile.P, profile.I`. You can also plot the profile. If `dist = ZEuclidean()` the STOMP algorithm will be used. +- `exclusion_zone` denotes an integer number of samples around the trivial match to avoid. The paper suggests using `exclusion_zone = m ÷ 4`. This is likely most beneficial for time-series dominated by low frequencies. + Reference: [Matrix profile II](https://www.cs.ucr.edu/~eamonn/STOMP_GPU_final_submission_camera_ready.pdf). """ -function matrix_profile(T::AbstractVector{<:Number}, m::Int; showprogress=true) +function matrix_profile(T::AbstractVector{<:Number}, m::Int; showprogress=true, kwargs...) n = lastlength(T) l = n-m+1 n > 2m+1 || throw(ArgumentError("Window length too long, maximum length is $((n+1)÷2)")) @@ -47,7 +49,7 @@ function matrix_profile(T::AbstractVector{<:Number}, m::Int; showprogress=true) # The expression with fastmath appears to be both more accurate and faster than both muladd and fma end QT[1] = QT₀[i] - distance_profile!(D, ZEuclidean(), QT, μ, σ, m, i) + distance_profile!(D, ZEuclidean(), QT, μ, σ, m, i; kwargs...) update_min!(P, I, D, i) showprogress && i % 5 == 0 && next!(prog) end diff --git a/test/runtests.jl b/test/runtests.jl index 51f59e7..d8d100a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -38,6 +38,17 @@ end @test m[1] < 1e-6 @test m[2] == 51 || m[2] == 112 + profileez = @inferred matrix_profile(T, length(y0), exclusion_zone=10) + Pez,Iez = profileez.P, profileez.I + @test_nowarn plot(profileez) + # plot(T, layout=2) + # plot!(P, sp=2) + + m = findmin(Pez) + @test m[1] < 1e-6 + @test m[2] == 51 || m[2] == 112 + @test all(Pez .>= P .- 1e-12) + # Test Euclidean between two series profile3 = @inferred matrix_profile(T, T, length(y0))