From 466cabb5c75432c60f8394a56c5a9ab021ff5bfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Theodor=20Bj=C3=B6rk?= <51053891+Glowster@users.noreply.github.com> Date: Tue, 25 Jun 2024 10:03:16 +0200 Subject: [PATCH 1/7] Fixed comment AbstractTreeNode.jl --- src/core/nodes/AbstractTreeNode.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/nodes/AbstractTreeNode.jl b/src/core/nodes/AbstractTreeNode.jl index 072916e..f2ef521 100644 --- a/src/core/nodes/AbstractTreeNode.jl +++ b/src/core/nodes/AbstractTreeNode.jl @@ -456,7 +456,7 @@ function ladderize!(tree::T) where {T<:AbstractTreeNode} end end -# Creates a dictionary of all the child counts (including the node itself) which can then be used by ladderize to sort the nodes +# Creates a dictionary of all the child counts which can then be used by ladderize to sort the nodes function countchildren(tree::T) where {T<:AbstractTreeNode} # Initialize the dictionary to store the number of children for each node children_count = Dict{T, Int}() From 3491e7780b6afcfec6c5340c02854c5d905aa706 Mon Sep 17 00:00:00 2001 From: Maximilian Danielsson Date: Wed, 24 Jul 2024 16:01:46 +0200 Subject: [PATCH 2/7] Add leaf_name_transform kwarg to populate_tree --- src/utils/misc.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/utils/misc.jl b/src/utils/misc.jl index 24d7e33..114f249 100644 --- a/src/utils/misc.jl +++ b/src/utils/misc.jl @@ -82,6 +82,7 @@ function populate_tree!( data; init_all_messages = true, tolerate_missing = 1, #0 = error if missing; 1 = warn and set to missing data; 2 = set to missing data + leaf_name_transform = x -> x ) if init_all_messages internal_message_init!(tree, starting_message) @@ -90,8 +91,8 @@ function populate_tree!( end name_dic = Dict(zip(names, 1:length(names))) for n in getleaflist(tree) - if haskey(name_dic, n.name) - populate_message!(n.message, data[name_dic[n.name]]) + if haskey(name_dic, leaf_name_transform(n.name)) + populate_message!(n.message, data[name_dic[leaf_name_transform(n.name)]]) else warn_str = n.name * " on tree but not found in names." if tolerate_missing == 0 @@ -107,7 +108,7 @@ end """ - populate_tree!(tree::FelNode, starting_message, names, data; init_all_messages = true, tolerate_missing = 1) + populate_tree!(tree::FelNode, starting_message, names, data; init_all_messages = true, tolerate_missing = 1, leaf_name_transform = x -> x) Takes a tree, and a `starting_message` (which will serve as the memory template for populating messages all over the tree). `starting_message` can be a message (ie. a vector of Partitions), but will also work with a single Partition (although the tree) @@ -117,6 +118,7 @@ When a leaf on the tree has a name that doesn't match anything in `names`, then - `tolerate_missing = 0`, an error will be thrown - `tolerate_missing = 1`, a warning will be thrown, and the message will be set to the uninformative message (requires identity!(::Partition) to be defined) - `tolerate_missing = 2`, the message will be set to the uninformative message, without warnings (requires identity!(::Partition) to be defined) +A renaming function that can eg. strip tags from the tree when matching leaf names with `names` can be passed to `leaf_name_transform` """ function populate_tree!( tree::FelNode, @@ -125,6 +127,7 @@ function populate_tree!( data; init_all_messages = true, tolerate_missing = 1, + leaf_name_transform = x -> x ) populate_tree!( tree, @@ -133,6 +136,7 @@ function populate_tree!( data, init_all_messages = init_all_messages, tolerate_missing = tolerate_missing, + leaf_name_transform = leaf_name_transform ) end From 9515055f64d7d045169c18ef55569893d520ed0a Mon Sep 17 00:00:00 2001 From: murrellb Date: Wed, 31 Jul 2024 15:00:14 +0200 Subject: [PATCH 3/7] Untested commit - interpolated discrete model --- src/models/discrete_models/discrete_models.jl | 1 + .../interpolated_discrete_model.jl | 142 ++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/models/discrete_models/interpolated_discrete_model.jl diff --git a/src/models/discrete_models/discrete_models.jl b/src/models/discrete_models/discrete_models.jl index 45d07dc..9517e29 100644 --- a/src/models/discrete_models/discrete_models.jl +++ b/src/models/discrete_models/discrete_models.jl @@ -3,4 +3,5 @@ include("codon_models.jl") include("GeneralCTMC.jl") include("DiagonalizedCTMC.jl") include("PiQ.jl") +include("interpolated_discrete_model.jl") include("utils/utils.jl") diff --git a/src/models/discrete_models/interpolated_discrete_model.jl b/src/models/discrete_models/interpolated_discrete_model.jl new file mode 100644 index 0000000..9588375 --- /dev/null +++ b/src/models/discrete_models/interpolated_discrete_model.jl @@ -0,0 +1,142 @@ +#InterpolatedDiscreteModel works by storing a number of P matrices, and the "t" values to which they correspond +#For a requested t, the returned P matrix is interpolated between it's two neighbours + +function check_eq_P(P) + return maximum(std(P,dims = 1)) +end + +mutable struct InterpolatedDiscreteModel <: DiscreteStateModel + tvec::Vector{Float64} + Pvec::Array{Float64,3} #Now a tensor. Pvec[:,:,i] is the ith P matrix. + #Generator must be something that returns a P matrix with T as the only argument + function InterpolatedDiscreteModel(siz::Int64, generator, tvec::Vector{Float64}) + @assert tvec[1] == 0.0 + @assert issorted(tvec) + Pvec = zeros(siz,siz,length(tvec)) + for (i,t) in enumerate(tvec) + Pvec[:,:,i] .= generator(t) + end + cp = check_eq_P(Pvec[:,:,end]) + if cp > 10^-9 + @warn "Max std dev of last P matrix is $(cp). Far from equilibrium - extend range" + end + new(tvec,Pvec) + end + #Alternative constructor where you directly specify the tensor of P matrices for each "t" + function InterpolatedDiscreteModel(Pvec::Array{Float64,3}, tvec::Vector{Float64}) + @assert tvec[1] == 0.0 + @assert issorted(tvec) + cp = check_eq_P(Pvec[:,:,end]) + if cp > 10^-9 + @warn "Max std dev of last P matrix is $(cp). Far from equilibrium - extend range" + end + new(tvec,Pvec) + end +end + + +#The keys in d must be the boundaries of the ranges for which we would index into +function range_index(v::Float64,ts::Vector{Float64}) + p = searchsortedlast(ts,v) # index of the last key less than or equal to v + return p,p+1 +end + +function interp_weight(tup,p) + @assert tup[1] <= p && p <= tup[2] + w = (p - tup[1]) ./ (tup[2] - tup[1]) + return 1-w,w +end + +#We could maybe hang a destintion "matrix" on the model, and this would store the +#interpolated P to that, saving new allocations. +function matrix_interpolate(t_query,ts,Ps) + inds = range_index(t_query,ts) + if inds[2] > length(ts) + return Ps[:,:,end] + end + w = interp_weight((ts[inds[1]],ts[inds[2]]),t_query) + approxP = w[1].*Ps[:,:,inds[1]] .+ w[2].*Ps[:,:,inds[2]] + return approxP +end + +function backward!( + dest::DiscretePartition, + source::DiscretePartition, + model::InterpolatedDiscreteModel, + node::FelNode) + P = matrix_interpolate(node.branchlength, model.tvec, model.Pvec) + mul!(dest.state, P, source.state) + dest.scaling .= source.scaling +end + +function forward!( + dest::DiscretePartition, + source::DiscretePartition, + model::InterpolatedDiscreteModel, + node::FelNode) + P = matrix_interpolate(node.branchlength, model.tvec, model.Pvec) + dest.state .= (source.state'*P)' + dest.scaling .= source.scaling +end + +function eq_freq(model::InterpolatedDiscreteModel) + model.Pvec[1,:,end] +end + +#step: Higher numbers mean smaller jumps +#cap: After this many points it starts doubling +function t_sequence(t::Float64,n::Int64; step = 2 ,cap = n - 10) + ts = zeros(n) + ts[1] = 0.0 #Note setting the first one to the zero + ts[2] = t + c = 2 + for i in 3:n + ts[i] = ts[i-1]+ts[c] + if mod(i,step)==0 + c += 1 + elseif i > cap + c = i + end + end + return ts +end + +function matrix_sequence(Q::Array{Float64,2},t::Float64,n::Int64; step = 2 ,cap = n - 10) + P = exp(Q .* t) + Ps = zeros(size(P)[1],size(P)[2],n) #Big stack of matrices + Ps[:,:,1] .= Diagonal(ones(size(P)[1])) #Note setting the first one to the identity + Ps[:,:,2] .= P + c = 2 + for i in 3:n + Ps[:,:,i] .= Ps[:,:,i-1]*Ps[:,:,c] + if mod(i,step)==0 + c += 1 + elseif i > cap + c = i + end + end + return Ps +end + +#This will take an existing InterpolatedDiscreteModel, and effectively scale all the "t" values in e^Qt. +function rescale!(m::InterpolatedDiscreteModel, factor::Float64) + m.tvec .= m.tvec ./ factor +end + + +#This is literally just a single P matrix. Maybe some uses, but likely for testing speed bounds +mutable struct PModel <: DiscreteStateModel + P::Array{Float64,2} +end +function backward!( + dest::DiscretePartition, source::DiscretePartition, + model::PModel, node::FelNode) + mul!(dest.state, model.P, source.state) + dest.scaling .= source.scaling +end +function forward!( + dest::DiscretePartition, source::DiscretePartition, + model::PModel, node::FelNode) + dest.state .= (source.state'*model.P)' + dest.scaling .= source.scaling +end From fec6974e3ede60ea4d69277e2c5008f99a443860 Mon Sep 17 00:00:00 2001 From: anton083 Date: Thu, 1 Aug 2024 01:32:40 +0200 Subject: [PATCH 4/7] Bump --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0db2b18..89fe585 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MolecularEvolution" uuid = "9f975960-e239-4209-8aa0-3d3ad5a82892" authors = ["Ben Murrell and contributors"] -version = "0.1.0" +version = "0.2.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" From 44a060d13ba3034efc2e8b9228e5ec77dd69b64c Mon Sep 17 00:00:00 2001 From: anton083 Date: Thu, 1 Aug 2024 01:36:17 +0200 Subject: [PATCH 5/7] Set LinearAlgebra compat to 1 --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 89fe585..f90e134 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,8 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -julia = "1.6" Distributions = "0.25" +LinearAlgebra = "1" Requires = "1.3" StatsBase = "0.34" +julia = "1.6" From 64efb0a3a04354c1453ecdcad6453182b0c5698c Mon Sep 17 00:00:00 2001 From: Maximilian Danielsson Date: Mon, 5 Aug 2024 10:31:34 +0200 Subject: [PATCH 6/7] Type getnodelist and co. return arrays --- src/core/nodes/AbstractTreeNode.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/nodes/AbstractTreeNode.jl b/src/core/nodes/AbstractTreeNode.jl index f2ef521..936e92d 100644 --- a/src/core/nodes/AbstractTreeNode.jl +++ b/src/core/nodes/AbstractTreeNode.jl @@ -348,7 +348,7 @@ end export getnodelist function getnodelist(node::T) where {T<:AbstractTreeNode} - nodelist = [] + nodelist = T[] nodes = [node] while nodes != [] node = pop!(nodes) @@ -391,7 +391,7 @@ end export getnonleaflist function getnonleaflist(node::T) where {T<:AbstractTreeNode} - nonleaflist = [] + nonleaflist = T[] nodes = [node] while nodes != [] node = pop!(nodes) @@ -407,7 +407,7 @@ end export getleaflist function getleaflist(node::T) where {T<:AbstractTreeNode} - leaflist = [] + leaflist = T[] nodes = [node] while nodes != [] node = pop!(nodes) From 540bdf24baa31822870643cc00cc9cc0c02f33d4 Mon Sep 17 00:00:00 2001 From: Anton Oresten Date: Mon, 5 Aug 2024 15:06:38 +0200 Subject: [PATCH 7/7] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f90e134..d48237a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MolecularEvolution" uuid = "9f975960-e239-4209-8aa0-3d3ad5a82892" authors = ["Ben Murrell and contributors"] -version = "0.2.0" +version = "0.2.1" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"