From 84027850946b82953d2ab7be804946f606dc4f14 Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson Date: Tue, 5 May 2020 10:56:29 +0800 Subject: [PATCH] work on supporting sliding distances --- examples/mixed_bag.jl | 37 +++++++++++++++++++++++++++++++++++++ src/MatrixProfile.jl | 42 ++++++++++++++++++++++-------------------- src/motifs.jl | 11 ++++++++--- src/plotting.jl | 18 +++++++++--------- test/runtests.jl | 7 ------- 5 files changed, 76 insertions(+), 39 deletions(-) create mode 100644 examples/mixed_bag.jl diff --git a/examples/mixed_bag.jl b/examples/mixed_bag.jl new file mode 100644 index 0000000..7e23b85 --- /dev/null +++ b/examples/mixed_bag.jl @@ -0,0 +1,37 @@ +using MatrixProfile +# Download data from https://sites.google.com/site/snippetfinderinfo/home/MixedBag.zip +datapath = "/tmp/MixedBag/" +cd(datapath) + +## Some data +T = parse.(Int, split(join(Char.(read("01911m_02019m_III_7680_200.txt"))), ',')) +profile, snips, Cfracs = snippets(T, 3, 100, 50) +plot(plot(snips, layout = (1, 3), size = (800, 200)), plot(profile, snips, legend = false)) + +## Power demand +T = parse.(Float32, readlines("Powerdemand_12_4500_200.txt")) +profile, snips, Cfracs = snippets(T, 4, 24, 12) +plot(plot(snips, size = (800, 200), xrotation=45), plot(profile, snips, legend = false, link=:none)) + +profile = matrix_profile(T, 24) +mot = motifs(profile, 4, 3, 48) +plot(profile, mot, legend=false) +plot(mot) + + +## Robot dog +T = parse.(Float32, readlines("RoboticDogActivityY_64_4000_400.txt")) +profile, snips, Cfracs = snippets(T, 4, 100, 10) +plot(plot(snips, size = (800, 200), xrotation=45), plot(profile, snips, legend = false, link=:none)) + +profile = matrix_profile(T, 100) +mot = motifs(profile, 4, 2, 200) +plot(profile, mot, legend=false) +plot(mot) + +using DynamicAxisWarping + +profile = matrix_profile(T, 100, DTWDistance(DTW(3))) +mot = motifs(profile, 4, 2, 200) +plot(profile, mot, legend=false) +plot(mot) diff --git a/src/MatrixProfile.jl b/src/MatrixProfile.jl index 645ebeb..bdd2200 100644 --- a/src/MatrixProfile.jl +++ b/src/MatrixProfile.jl @@ -27,7 +27,7 @@ Return the matrix profile and the profile indices of time series `T` with window Reference: [Matrix profile II](https://www.cs.ucr.edu/~eamonn/STOMP_GPU_final_submission_camera_ready.pdf). """ function matrix_profile(T::AbstractVector{<:Number}, m::Int; showprogress=true) - n = length(T) + n = lastlength(T) l = n-m+1 n > 2m+1 || throw(ArgumentError("Window length too long, maximum length is $((n+1)÷2)")) μ,σ = running_mean_std(T, m) @@ -46,7 +46,7 @@ function matrix_profile(T::AbstractVector{<:Number}, m::Int; showprogress=true) QT[1] = QT₀[i] distance_profile!(D, QT, μ, σ, m, i) update_min!(P, I, D, i) - showprogress && i % 10 == 0 && next!(prog) + showprogress && i % 5 == 0 && next!(prog) end Profile(T, P, I, m, nothing) end @@ -55,7 +55,7 @@ end function matrix_profile(A::AbstractVector{<:Number}, T::AbstractVector{<:Number}, m::Int; showprogress=true) n = length(A) l = n-m+1 - lT = length(T)-m+1 + lT = lastlength(T)-m+1 # n > 2m+1 || throw(ArgumentError("Window length too long, maximum length is $((n+1)÷2)")) μT,σT = running_mean_std(T, m) μA,σA = running_mean_std(A, m) @@ -73,7 +73,7 @@ function matrix_profile(A::AbstractVector{<:Number}, T::AbstractVector{<:Number} QT[1] = dot(getwindow(A, m, i), getwindow(T,m,1)) #QT₀[i] distance_profile!(D, QT, μA, σA, μT, σT, m, i) update_min!(P, I, D, i, false) - showprogress && i % 10 == 0 && next!(prog) + showprogress && i % 5 == 0 && next!(prog) end Profile(T, P, I, m, A) end @@ -111,7 +111,9 @@ distance_profile(QT::AbstractVector{S}, μA, σA, μT, σT, m::Int) where {S<:Nu function update_min!(P, I, D, i, sym=true) - @inbounds for j in eachindex(P,I,D) + n = lastlength(P) + @assert n == lastlength(I) == lastlength(D) "Lengths are not consistent, $(eachindex(P,I,D))" + @inbounds for j in 1:n sym && j == i && continue if D[j] < P[j] P[j] = D[j] @@ -165,7 +167,6 @@ end -## General data and distance =================================================== """ distance_profile(Q, T) @@ -184,13 +185,14 @@ function distance_profile!(D::AbstractVector{S}, Q::AbstractVector{S}, T::Abstra end distance_profile(Q, T) = distance_profile!(similar(T, length(T)-length(Q)+1), Q, T) +## General data and distance =================================================== function matrix_profile(T, m::Int, dist; showprogress=true) - n = length(T) + n = lastlength(T) l = n-m+1 # n > 2m+1 || throw(ArgumentError("Window length too long, maximum length is $((n+1)÷2)")) P = distance_profile(dist, getwindow(T,m,1), T) - P[1] = typemax(eltype(P)) + P[1] = typemax(floattype(P)) D = similar(P) I = ones(Int, l) prog = Progress((l - 1) ÷ 10, dt=1, desc="Matrix profile", barglyphs = BarGlyphs("[=> ]"), color=:blue) @@ -198,15 +200,15 @@ function matrix_profile(T, m::Int, dist; showprogress=true) Ti = getwindow(T,m,i) distance_profile!(D, dist, Ti, T) update_min!(P, I, D, i) - showprogress && i % 10 == 0 && next!(prog) + showprogress && i % 5 == 0 && next!(prog) end Profile(T, P, I, m, nothing) end function matrix_profile(A::AbstractArray{S}, B::AbstractArray{S}, m::Int, dist; showprogress=true) where S - n = length(A) + n = lastlength(A) l = n-m+1 - lT = length(B)-m+1 + lT = lastlength(B)-m+1 n > 2m+1 || throw(ArgumentError("Window length too long, maximum length is $((n+1)÷2)")) P = distance_profile(dist, getwindow(A,m,1), B) # P[1] = typemax(eltype(P)) @@ -217,7 +219,7 @@ function matrix_profile(A::AbstractArray{S}, B::AbstractArray{S}, m::Int, dist; Ai = getwindow(A,m,i) distance_profile!(D, dist, Ai, B) update_min!(P, I, D, i, false) - showprogress && i % 10 == 0 && next!(prog) + showprogress && i % 5 == 0 && next!(prog) end Profile(B, P, I, m, A) end @@ -263,7 +265,7 @@ All MP distance profiles between subsequences of length `S` in `T` using interna """ function mpdist_profile(T::AbstractVector,S::Int, m::Int) S >= m || throw(ArgumentError("S should be > m")) - n = length(T) + n = lastlength(T) pad = S * ceil(Int, n / S) - n T = [T;zeros(pad)] @showprogress 1 "MP dist profile" map(1:S:n-S) do i @@ -282,8 +284,8 @@ function mpdist_profile(A::AbstractVector, B::AbstractVector, m::Int) # % A the longer time series # % B the shorter time series th = 0.05 - l1 = length(A) - l2 = length(B) + l1 = lastlength(A) + l2 = lastlength(B) if l1 < l2 A, B = B, A end @@ -296,8 +298,8 @@ function mpdist_profile(A::AbstractVector, B::AbstractVector, m::Int) moving_means[:,i] = moving_mean(D[:,i], cols) end - l = length(A)-length(B)+1 - N_right_marginal = length(B)-m+1 + l = lastlength(A)-lastlength(B)+1 + N_right_marginal = lastlength(B)-m+1 left_marginal = zeros(N_right_marginal) map(1:l) do i @@ -310,8 +312,8 @@ end function mpdistmat(A::AbstractVector, B::AbstractVector, m::Int) - N = length(B)-m +1 - D = similar(A, length(A)-m+1, N) + N = lastlength(B)-m +1 + D = similar(A, lastlength(A)-m+1, N) for i = 1:N D[:,i] = distance_profile(getwindow(B, m, i), A) end @@ -335,7 +337,7 @@ function snippets(T, k, S, m = max(S÷10, 4)) snippet_profiles = similar(D, k) for j = 1:k minA = Inf - for i = 1:n÷S + for i = 1:length(D) A = sum(min.(D[i], Q)) if A < minA minA = A diff --git a/src/motifs.jl b/src/motifs.jl index 6e7fcb3..fa5d5e5 100644 --- a/src/motifs.jl +++ b/src/motifs.jl @@ -27,21 +27,26 @@ seqs(m::Array{<:Subsequence}) = getfield.(m, :seq) seqlength(m::Array{<:Subsequence}) = length(m[1].seq) subseqtype(m::Array{<:Subsequence}) = subseqtype(m[1]) -function motifs(p::Profile, k, r, th = 0, found_motifs = Motif[]) +function motifs(p::Profile, k, r, th = p.m, found_motifs = Motif[]; dist = nothing) length(found_motifs) == k && return found_motifs m = p.m P = copy(p.P) remove_found_motifs!(P, found_motifs, th) d, i = findmin(P) - distance_profile!(P, getwindow(p.T, m, i), p.T) + if dist === nothing + distance_profile!(P, getwindow(p.T, m, i), p.T) + else + distance_profile!(P, dist, getwindow(p.T, m, i), p.T) + end remove_found_motifs!(P, found_motifs, th) onsets = findall(<=((d + 1e-5)*r), P) i ∈ onsets || push!(onsets, i) sort!(onsets) push!(found_motifs, Motif(Subsequence(p, onsets, "Motif"))) - motifs(p::Profile, k, r, th, found_motifs) + motifs(p::Profile, k, r, th, found_motifs; dist=dist) end + function Subsequence(p::Profile, onsets, type::String = "Subsequence") m = p.m inds = 0:m-1 diff --git a/src/plotting.jl b/src/plotting.jl index 16097d7..aedd1e5 100644 --- a/src/plotting.jl +++ b/src/plotting.jl @@ -8,7 +8,7 @@ _append_inf(x) = vec([x; fill(Inf, 1, size(x,2))]) title --> "T" label --> "T" subplot --> 1 - p.T + p.T isa AbstractVector{<:Number} ? p.T : reduce(hcat, p.T)' end @series begin title --> "Matrix profile" @@ -42,19 +42,19 @@ end end -@recipe function plot(motifs::SubSeqType) +@recipe function plot(sequences::SubSeqType) - motifs isa Vector || (motifs = [motifs]) - layout --> length(motifs) - for (j,m) in enumerate(motifs) + sequences isa Vector || (sequences = [sequences]) + layout --> length(sequences) + for (j,s) in enumerate(sequences) @series begin legend --> false group := j - label --> string(subseqtype(m), " ", j) - title --> string(subseqtype(m), " ", j) + label --> string(subseqtype(s), " ", j) + title --> string(subseqtype(s), " ", j) subplot := j - inds = 0:seqlength(m)-1 - _append_inf(onsets(m)' .+ inds), _append_inf(reduce(hcat, seqs(m))) + inds = 0:seqlength(s)-1 + _append_inf(onsets(s)' .+ inds), _append_inf(reduce(hcat, seqs(s))) end end end diff --git a/test/runtests.jl b/test/runtests.jl index 5aada1a..d8d88eb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -180,13 +180,6 @@ end end -## Test snippets on long series -# path = "/tmp/MixedBag/01911m_02019m_III_7680_200.txt" -# T = parse.(Int, split(join(Char.(read(path))), ',')) -# profile, mot, Cfracs = MatrixProfile.snippets(T, 3, 100, 50) -# plot(profile, mot, legend=false) -# plot(mot, layout=(1,3), size=(800,200)) - # Benchmark