diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8ffa5e46..61483216 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -6,9 +6,15 @@ on: - main - fergus/docs +concurrency: + # cancels when a PR gets updated + group: ${{ github.head_ref || github.run_id }}-${{ github.actor }} + cancel-in-progress: true + jobs: docs: name: Build and publish + timeout-minutes: 30 runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e0fece8b..9cd84fcf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,10 +14,16 @@ on: - 'src/**.jl' - 'test/**.jl' +concurrency: + # cancels when a PR gets updated + group: ${{ github.head_ref || github.run_id }}-${{ github.actor }} + cancel-in-progress: true + jobs: smoke-test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} + timeout-minutes: 30 runs-on: ${{ matrix.os }} strategy: matrix: diff --git a/docs/src/why-and-how.md b/docs/src/why-and-how.md index a202daa2..847ede66 100644 --- a/docs/src/why-and-how.md +++ b/docs/src/why-and-how.md @@ -46,7 +46,8 @@ sum(total_flux) Doing so would allow us to only pre-allocate 2 flux arrays, instead of 4 when using the in-place variants: ```@example model_invocation -flux1, flux2 = make_fluxes(model, energy, 2) +fluxes = SpectralFitting.construct_objective_cache(model, energy) +flux1, flux2 = eachcol(fluxes) invokemodel!(flux1, energy, XS_PowerLaw()) invokemodel!(flux2, energy, XS_PowerLaw(a=FitParam(3.0))) @@ -63,39 +64,12 @@ sum(flux1) It is precisely this re-writing that SpectralFitting performs via [`@generated`](https://docs.julialang.org/en/v1/manual/metaprogramming/#Generated-functions) functions. We can inspect the code used to generate the invocation body after defining a [`CompositeModel`](@ref): ```@example model_invocation -fluxes = (flux1, flux2) - model = XS_PhotoelectricAbsorption() * ( XS_PowerLaw() + XS_PowerLaw(a=FitParam(3.0)) + XS_BlackBody() ) -params = get_value.(get_params(model)) - -SpectralFitting.__generated_model_call!(fluxes, energy, typeof(model), params) -nothing # hide -``` - -```@raw html -
begin
-    @inbounds let (flux1, flux2) = fluxes
-        var"##K#356" = params[1]
-        var"##a#357" = params[2]
-        var"##K#358" = params[3]
-        var"##a#359" = params[4]
-        var"##K#360" = params[5]
-        var"##T#361" = params[6]
-        var"##ηH#362" = params[7]
-        invokemodel!(flux1, energy, XS_PowerLaw, var"##K#356", var"##a#357")
-        invokemodel!(flux2, energy, XS_PowerLaw, var"##K#358", var"##a#359")
-        @. flux1 = flux1 + flux2
-        invokemodel!(flux2, energy, XS_BlackBody, var"##K#360", var"##T#361")
-        @. flux1 = flux1 + flux2
-        invokemodel!(flux2, energy, XS_PhotoelectricAbsorption, var"##ηH#362")
-        @. flux1 = flux1 * flux2
-        return flux1
-    end
-end
-
+params = get_value.(SpectralFitting.model_parameters_tuple(model)) +SpectralFitting.FunctionGeneration.generated_model_call!(typeof(fluxes), typeof(energy), typeof(model), typeof(params)) ``` This generated function also takes care of some other things for us, such as unpacking parameters (optionally unpacking frozen parameters separately), and ensuring any closure are passed to [`invokemodel`](@ref) if a model needs them (e.g., [`SurrogateSpectralModel`](@ref)). diff --git a/src/datasets/datasets.jl b/src/datasets/datasets.jl index 049b9cac..5ee7c6c1 100644 --- a/src/datasets/datasets.jl +++ b/src/datasets/datasets.jl @@ -94,6 +94,7 @@ specified by the caller. - [`make_objective`](@ref) - [`make_domain_variance`](@ref) - [`make_model_domain`](@ref) +- [`make_ouput_domain`](@ref) """ abstract type AbstractDataset end @@ -128,13 +129,26 @@ make_objective_variance(layout::AbstractDataLayout, dataset::AbstractDataset) = """ make_model_domain -Returns the array used as the domain for the modelling +Returns the array used as the domain for the modelling. This is paired with [`make_domain_variance`](@ref) """ make_model_domain(layout::AbstractDataLayout, dataset::AbstractDataset) = error("Layout $(layout) is not implemented for $(typeof(dataset))") make_domain_variance(layout::AbstractDataLayout, dataset::AbstractDataset) = error("Layout $(layout) is not implemented for $(typeof(dataset))") +""" + make_output_domain + +Returns the array used as the output domain. That is, in cases where the model +input and output map to different domains, the input domain is said to be the +model domain, the input domain is said to be the model domain. + +The distinction is mainly used for the purposes of simulating data and for +visualising data. +""" +make_output_domain(layout::AbstractDataLayout, dataset::AbstractDataset) = + error("Layout $(layout) is not implemented for $(typeof(dataset))") + function objective_transformer(layout::AbstractDataLayout, dataset::AbstractDataset) @warn "Using default objective transformer!" _DEFAULT_TRANSFORMER() @@ -160,6 +174,7 @@ Must support the same API, but may also have some query methods for specific int abstract type AbstractMultiDataset <: AbstractDataset end export make_model_domain, + make_output_domain, make_domain_variance, make_objective, make_objective_variance, diff --git a/src/datasets/injectivedata.jl b/src/datasets/injectivedata.jl index 497e8594..c37bf1e5 100644 --- a/src/datasets/injectivedata.jl +++ b/src/datasets/injectivedata.jl @@ -35,6 +35,9 @@ make_objective(::ContiguouslyBinned, dataset::InjectiveData) = make_model_domain(::OneToOne, dataset::InjectiveData) = dataset.domain make_objective(::OneToOne, dataset::InjectiveData) = dataset.codomain[dataset.data_mask] +make_output_domain(layout::AbstractDataLayout, dataset::InjectiveData) = + make_model_domain(layout, dataset) + function make_objective_variance( ::AbstractDataLayout, dataset::InjectiveData{V}, @@ -42,8 +45,8 @@ function make_objective_variance( if !isnothing(dataset.codomain_variance) dataset.codomain_variance[dataset.data_mask] else - # todo: i dunno just something - 1e-8 .* dataset.codomain[dataset.data_mask] + # TODO: i dunno just something + ones(eltype(V), count(dataset.data_mask)) end end diff --git a/src/datasets/ogip.jl b/src/datasets/ogip.jl index 68d8bbec..957931d1 100644 --- a/src/datasets/ogip.jl +++ b/src/datasets/ogip.jl @@ -209,8 +209,14 @@ function _build_reponse_matrix( channels::RMFChannels, T::Type, ) - R = spzeros(T, header.num_channels, length(rmf.bins_low)) - build_response_matrix!(R, rmf.f_chan, rmf.n_chan, rmf.matrix, header.first_channel) + R = build_response_matrix( + rmf.f_chan, + rmf.n_chan, + rmf.matrix, + header.num_channels, + header.first_channel, + T, + ) SpectralFitting.ResponseMatrix( R, channels.channels, @@ -221,6 +227,47 @@ function _build_reponse_matrix( ) end +function build_response_matrix( + f_chan::Vector, + n_chan::Vector, + matrix_rows::Vector, + num_cols::Int, + first_channel, + T::Type, +) + ptrs = Int[1] + indices = Int[] + matrix = Float64[] + + prev = first(ptrs) + + for i in eachindex(f_chan) + M = matrix_rows[i] + + row_len = 0 + for (f, n) in zip(f_chan[i], n_chan[i]) + if n == 0 + # advance row + break + end + first = (f - first_channel) + 1 + # append all of the indices + for j = 0:n-1 + push!(indices, j + first) + end + append!(matrix, M[row_len+1:row_len+n]) + row_len += n + end + + next = row_len + prev + push!(ptrs, next) + prev = next + end + + SparseArrays.SparseMatrixCSC{T,Int}(num_cols, length(f_chan), ptrs, indices, matrix) +end + +# TODO: marked for refactoring (unused) function build_response_matrix!( R, f_chan::Vector, @@ -351,19 +398,19 @@ function read_filename(header, entry, parent, exts...) if haskey(header, entry) path::String = strip(header[entry]) if path == "NONE" - return missing + return nothing end name = find_file(data_directory, path, parent_name, exts) - if !ismissing(name) + if !isnothing(name) return name end end - missing + nothing end function find_file(dir, name, parent, extensions) if length(name) == 0 - return missing + return nothing elseif match(r"%match%", name) !== nothing base = splitext(parent)[1] for ext in extensions @@ -373,9 +420,9 @@ function find_file(dir, name, parent, extensions) end end @warn "Missing! Could not find file '%match%': tried $extensions" - return missing + return nothing elseif match(r"^none\b", name) !== nothing - return missing + return nothing end joinpath(dir, name) end diff --git a/src/datasets/ogipdataset.jl b/src/datasets/ogipdataset.jl index 882c073a..4be6480e 100644 --- a/src/datasets/ogipdataset.jl +++ b/src/datasets/ogipdataset.jl @@ -10,9 +10,9 @@ end function load_ogip_dataset( spec_path; hdu = 2, - background = missing, - response = missing, - ancillary = missing, + background = nothing, + response = nothing, + ancillary = nothing, kwargs..., ) paths = SpectralDataPaths( diff --git a/src/datasets/response.jl b/src/datasets/response.jl index 0b1603c1..e39fa53b 100644 --- a/src/datasets/response.jl +++ b/src/datasets/response.jl @@ -8,6 +8,12 @@ mutable struct ResponseMatrix{T} bins_high::Vector{T} end +""" + response_energy(response::ResponseMatrix) + +Get the contiguously binned energy corresponding to the *input domain* of the +response matrix. This is equivalent to the model domain. +""" function response_energy(resp::ResponseMatrix{T}) where {T} E = zeros(T, length(resp.bins_low) + 1) E[1:end-1] .= resp.bins_low @@ -15,6 +21,19 @@ function response_energy(resp::ResponseMatrix{T}) where {T} E end +""" + folded_energy(response::ResponseMatrix) + +Get the contiguously binned energy corresponding to the *output (folded) domain* +of the response matrix. That is, the channel energies as used by the spectrum. +""" +function folded_energy(resp::ResponseMatrix{T}) where {T} + E = zeros(T, length(resp.channel_bins_low) + 1) + E[1:end-1] .= resp.channel_bins_low + E[end] = resp.channel_bins_high[end] + E +end + function regroup!(resp::ResponseMatrix{T}, grouping) where {T} itt = GroupingIterator(grouping) new_matrix = zeros(T, (length(itt), size(resp.matrix, 2))) diff --git a/src/datasets/spectraldata.jl b/src/datasets/spectraldata.jl index 583d21f2..35e068be 100644 --- a/src/datasets/spectraldata.jl +++ b/src/datasets/spectraldata.jl @@ -1,8 +1,8 @@ struct SpectralDataPaths - spectrum::Union{Missing,String} - background::Union{Missing,String} - response::Union{Missing,String} - ancillary::Union{Missing,String} + spectrum::Union{Nothing,String} + background::Union{Nothing,String} + response::Union{Nothing,String} + ancillary::Union{Nothing,String} end function Base.show(io::IO, ::MIME"text/plain", @nospecialize(paths::SpectralDataPaths)) @@ -16,27 +16,27 @@ function Base.show(io::IO, ::MIME"text/plain", @nospecialize(paths::SpectralData end function SpectralDataPaths(; - spectrum = missing, - background = missing, - response = missing, - ancillary = missing, + spectrum = nothing, + background = nothing, + response = nothing, + ancillary = nothing, ) SpectralDataPaths(spectrum, background, response, ancillary) end function SpectralDataPaths( spec_path; - background = missing, - response = missing, - ancillary = missing, + background = nothing, + response = nothing, + ancillary = nothing, ) background_path, response_path, ancillary_path = OGIP.read_paths_from_spectrum(spec_path) SpectralDataPaths( spectrum = spec_path, - background = ismissing(background) ? background_path : background, - response = ismissing(response) ? response_path : response, - ancillary = ismissing(ancillary) ? ancillary_path : ancillary, + background = isnothing(background) ? background_path : background, + response = isnothing(response) ? response_path : response, + ancillary = isnothing(ancillary) ? ancillary_path : ancillary, ) end @@ -44,9 +44,9 @@ mutable struct SpectralData{T} <: AbstractDataset spectrum::Spectrum{T} response::ResponseMatrix{T} # background is optional - background::Union{Missing,Spectrum{T}} + background::Union{Nothing,Spectrum{T}} # ancillary response is optionally, may also have already been folded into response - ancillary::Union{Missing,AncillaryResponse{T}} + ancillary::Union{Nothing,AncillaryResponse{T}} energy_low::Vector{T} # energy translated from the response channels energy_high::Vector{T} # energy translated from the response channels @@ -57,15 +57,36 @@ end # constructor -SpectralData(paths::SpectralDataPaths; kwargs...) = _dataset_from_ogip(paths; kwargs...) +function SpectralData(paths::SpectralDataPaths; kwargs...) + spec, resp, back, anc = _read_all_ogip(paths; kwargs...) + + # convert everything to rates + if spec.units == u"counts" + spec.units = u"counts / s" + @. spec.data /= spec.exposure_time + if !isnothing(spec.errors) + @. spec.errors /= spec.exposure_time + end + end + + if !isnothing(back) && back.units == u"counts" + back.units = u"counts / s" + @. back.data /= back.exposure_time + if !isnothing(back.errors) + @. back.errors /= back.exposure_time + end + end + + SpectralData(spec, resp; background = back, ancillary = anc) +end function SpectralData( spectrum::Spectrum, response::ResponseMatrix; # try to match the domains of the response matrix to the data match_domains = true, - background = missing, - ancillary = missing, + background = nothing, + ancillary = nothing, ) domain = _make_domain_vector(spectrum, response) energy_low, energy_high = _make_energy_vector(spectrum, response) @@ -109,6 +130,8 @@ function make_objective_variance(layout::AbstractDataLayout, dataset::SpectralDa end make_model_domain(::ContiguouslyBinned, dataset::SpectralData) = dataset.domain +make_output_domain(::ContiguouslyBinned, dataset::SpectralData) = + folded_energy(dataset.response) restrict_domain!(dataset::SpectralData, low, high) = restrict_domain!(dataset, i -> high > i > low) @@ -126,18 +149,7 @@ function restrict_domain!(dataset::SpectralData, condition) dataset end -function objective_transformer( - layout::ContiguouslyBinned, - dataset::SpectralData{T}, -) where {T} - R_folded = if has_ancillary(dataset) - sparse(fold_ancillary(dataset.response, dataset.ancillary)) - else - dataset.response.matrix - end - R = R_folded[dataset.data_mask, :] - ΔE = bin_widths(dataset) - E = response_energy(dataset.response) +function _fold_transformer(T::Type, layout, R, ΔE, E) cache = DiffCache(construct_objective_cache(layout, T, length(E), 1)) function _transformer!!(energy, flux) f = rebin_if_different_domains!(get_tmp(cache, flux), E, energy, flux) @@ -152,10 +164,26 @@ function objective_transformer( _transformer!! end +function objective_transformer( + layout::ContiguouslyBinned, + dataset::SpectralData{T}, +) where {T} + R_folded = if has_ancillary(dataset) + sparse(fold_ancillary(dataset.response, dataset.ancillary)) + else + dataset.response.matrix + end + R = R_folded[dataset.data_mask, :] + ΔE = bin_widths(dataset) + model_domain = response_energy(dataset.response) + _fold_transformer(T, layout, R, ΔE, model_domain) +end + + unmasked_bin_widths(dataset::SpectralData) = dataset.energy_high .- dataset.energy_low bin_widths(dataset::SpectralData) = unmasked_bin_widths(dataset)[dataset.data_mask] -has_background(dataset::SpectralData) = !ismissing(dataset.background) -has_ancillary(dataset::SpectralData) = !ismissing(dataset.ancillary) +has_background(dataset::SpectralData) = !isnothing(dataset.background) +has_ancillary(dataset::SpectralData) = !isnothing(dataset.ancillary) function drop_bad_channels!(dataset::SpectralData) indices = findall(!=(GOOD_QUALITY), dataset.spectrum.quality) @@ -235,7 +263,7 @@ function subtract_background!(dataset::SpectralData) error("No background to subtract. Did you already subtract the background?") end subtract_background!(dataset.spectrum, dataset.background) - dataset.background = missing + dataset.background = nothing dataset end @@ -258,44 +286,44 @@ function rebin_if_different_domains!(output, data_domain, model_domain, input) output end -function _dataset_from_ogip(paths::SpectralDataPaths) - spec = OGIP.read_spectrum(paths.spectrum) - back = if !ismissing(paths.background) +function _read_all_ogip(paths::SpectralDataPaths; forgiving = false) + spec = if !isnothing(paths.spectrum) + OGIP.read_spectrum(paths.spectrum) + else + if !forgiving + @warn "No spectrum file found." + end + nothing + end + back = if !isnothing(paths.background) OGIP.read_background(paths.background) else - @warn "No background file found." - missing + if !forgiving + @warn "No background file found." + end + nothing end - resp = if !ismissing(paths.response) + resp = if !isnothing(paths.response) OGIP.read_rmf(paths.response) else - throw( - "No response file found in the header. Response must be specified with the keyword `response=PATH`.", - ) + if !forgiving + throw( + "No response file found in the header. Response must be specified with the keyword `response=PATH`.", + ) + else + nothing + end end - ancillary = if !ismissing(paths.ancillary) + ancillary = if !isnothing(paths.ancillary) OGIP.read_ancillary_response(paths.ancillary) else - @warn "No ancillary file found." - missing - end - - # convert everything to rates - if spec.units == u"counts" - spec.units = u"counts / s" - @. spec.data /= spec.exposure_time - if !ismissing(spec.errors) - @. spec.errors /= spec.exposure_time + if !forgiving + @warn "No ancillary file found." end + nothing end - if !ismissing(back) && back.units == u"counts" - back.units = u"counts / s" - @. back.data /= back.exposure_time - if !ismissing(back.errors) - @. back.errors /= back.exposure_time - end - end - SpectralData(spec, resp; background = back, ancillary = ancillary) + + (spec, resp, back, ancillary) end function _make_domain_vector(::Spectrum, resp::ResponseMatrix{T}) where {T} @@ -344,6 +372,10 @@ macro _forward_SpectralData_api(args) T, field = args.args quote SpectralFitting.supports_contiguosly_binned(t::Type{<:$(T)}) = true + SpectralFitting.make_output_domain( + layout::SpectralFitting.AbstractDataLayout, + t::$(T), + ) = SpectralFitting.make_output_domain(layout, getfield(t, $(field))) SpectralFitting.make_model_domain( layout::SpectralFitting.AbstractDataLayout, t::$(T), @@ -454,7 +486,7 @@ function _printinfo(io, data::SpectralData{T}) where {T} print( io, Crayons.Crayon(foreground = :dark_gray), - "Background: missing", + "Background: nothing", Crayons.Crayon(reset = true), "\n", ) @@ -473,7 +505,7 @@ function _printinfo(io, data::SpectralData{T}) where {T} print( io, Crayons.Crayon(foreground = :dark_gray), - "Ancillary: missing", + "Ancillary: nothing", Crayons.Crayon(reset = true), "\n", ) diff --git a/src/datasets/spectrum.jl b/src/datasets/spectrum.jl index e9a1f432..a0a0e03a 100644 --- a/src/datasets/spectrum.jl +++ b/src/datasets/spectrum.jl @@ -12,7 +12,7 @@ mutable struct Spectrum{T} <: AbstractDataset area_scale::T error_statistics::SpectralFitting.ErrorStatistics.T - errors::Union{Missing,Vector{T}} + errors::Union{Nothing,Vector{T}} systematic_error::T telescope_name::String @@ -45,6 +45,11 @@ function make_model_domain(::ContiguouslyBinned, dataset::Spectrum) dataset.channels end +function make_output_domain(::ContiguouslyBinned, dataset::Spectrum) + @warn "Spectrum doesn't know the energy values by default. Domain is channels. Proceed only if you know what you are doing." + dataset.channels +end + isgrouped(spectrum::Spectrum) = all(==(1), spectrum.grouping) regroup!(spectrum::Spectrum) = regroup!(spectrum, spectrum.grouping) @@ -59,7 +64,7 @@ function regroup!(spectrum::Spectrum{T}, grouping) where {T} spectrum.channels[grp[1]] = grp[1] regroup_vector!(spectrum.data, grp) regroup_quality_vector!(spectrum.quality, grp) - if !ismissing(spectrum.errors) + if !isnothing(spectrum.errors) vs = spectrum.data[grp[1]] if spectrum.units == u"counts" spectrum.errors[grp[1]] = count_error(vs, 1.0) @@ -95,7 +100,7 @@ function drop_channels!(spectrum::Spectrum, indices) deleteat!(spectrum.data, indices) deleteat!(spectrum.quality, indices) deleteat!(spectrum.grouping, indices) - if !ismissing(spectrum.errors) + if !isnothing(spectrum.errors) deleteat!(spectrum.errors, indices) end length(indices) diff --git a/src/fitting/cache.jl b/src/fitting/cache.jl index 9003306e..1155675b 100644 --- a/src/fitting/cache.jl +++ b/src/fitting/cache.jl @@ -83,7 +83,8 @@ end struct FittingConfig{ImplType,CacheType,P,D,O} cache::CacheType parameters::P - domain::D + model_domain::D + output_domain::D objective::O variance::O covariance::O @@ -91,29 +92,47 @@ struct FittingConfig{ImplType,CacheType,P,D,O} impl::AbstractSpectralModelImplementation, cache::C, params::P, - domain::D, + model_domain::D, + output_domain::D, objective::O, variance::O; covariance::O = inv.(variance), ) where {C,P,D,O} - new{typeof(impl),C,P,D,O}(cache, params, domain, objective, variance, covariance) + new{typeof(impl),C,P,D,O}( + cache, + params, + model_domain, + output_domain, + objective, + variance, + covariance, + ) end end function FittingConfig(model::AbstractSpectralModel, dataset::AbstractDataset) layout = common_support(model, dataset) - domain = make_model_domain(layout, dataset) + model_domain = make_model_domain(layout, dataset) + output_domain = make_output_domain(layout, dataset) objective = make_objective(layout, dataset) variance = make_objective_variance(layout, dataset) params = _allocate_free_parameters(model) cache = SpectralCache( layout, model, - domain, + model_domain, objective, objective_transformer(layout, dataset), ) - FittingConfig(implementation(model), cache, params, domain, objective, variance) + FittingConfig( + implementation(model), + cache, + params, + model_domain, + output_domain, + objective, + variance, + ) end function _f_objective(config::FittingConfig) @@ -128,7 +147,7 @@ function finalize( statistic = ChiSquared(), σparams = nothing, ) - y = _f_objective(config)(config.domain, params) + y = _f_objective(config)(config.model_domain, params) chi2 = measure(statistic, config.objective, y, config.variance) FittingResult(chi2, params, σparams, config) end diff --git a/src/fitting/methods.jl b/src/fitting/methods.jl index 418981bd..48ceb1ab 100644 --- a/src/fitting/methods.jl +++ b/src/fitting/methods.jl @@ -71,7 +71,7 @@ function fit( ) lsq_result = _lsq_fit( _f_objective(config), - config.domain, + config.model_domain, config.objective, config.covariance, config.parameters, @@ -124,7 +124,7 @@ function fit( # build problem and solve opt_f = Optimization.OptimizationFunction{false}(objective, _autodiff) # todo: something is broken with passing the boundaries - opt_prob = Optimization.OptimizationProblem{false}(opt_f, u0, config.domain) + opt_prob = Optimization.OptimizationProblem{false}(opt_f, u0, config.model_domain) sol = Optimization.solve(opt_prob, optim_alg; method_kwargs...) finalize(config, sol.u; statistic = statistic) end diff --git a/src/fitting/multi-cache.jl b/src/fitting/multi-cache.jl index 1686f9c3..332bb74d 100644 --- a/src/fitting/multi-cache.jl +++ b/src/fitting/multi-cache.jl @@ -2,6 +2,7 @@ struct MultiModelCache{K,N,CacheTypes<:Tuple,ParameterMappingType} <: AbstractFi caches::CacheTypes all_outputs::K domain_mapping::NTuple{N,Int} + output_domain_mapping::NTuple{N,Int} objective_mapping::NTuple{N,Int} parameter_mapping::ParameterMappingType end @@ -51,6 +52,8 @@ _build_objective_mapping(layout::AbstractDataLayout, dataset::FittableMultiDatas _build_mapping_length(i -> make_objective(layout, i), dataset.d) _build_domain_mapping(layout::AbstractDataLayout, dataset::FittableMultiDataset) = _build_mapping_length(i -> make_model_domain(layout, i), dataset.d) +_build_output_domain_mapping(layout::AbstractDataLayout, dataset::FittableMultiDataset) = + _build_mapping_length(i -> make_output_domain(layout, i), dataset.d) function FittingConfig(prob::FittingProblem) impl = @@ -62,6 +65,7 @@ function FittingConfig(prob::FittingProblem) variances = map(d -> make_objective_variance(layout, d), prob.data.d) # build index mappings for pulling out the data domains, domain_mapping = _build_domain_mapping(layout, prob.data) + output_domains, output_domain_mapping = _build_output_domain_mapping(layout, prob.data) objectives, objective_mapping = _build_objective_mapping(layout, prob.data) parameters, parameter_mapping = _build_parameter_mapping(prob.model, prob.bindings) @@ -85,6 +89,7 @@ function FittingConfig(prob::FittingProblem) caches, DiffCache(similar(all_objectives)), domain_mapping, + output_domain_mapping, objective_mapping, parameter_mapping, ) @@ -93,6 +98,7 @@ function FittingConfig(prob::FittingProblem) cache, parameters, reduce(vcat, domains), + reduce(vcat, output_domains), all_objectives, reduce(vcat, variances), ) @@ -104,7 +110,7 @@ function finalize( statistic = ChiSquared(), σparams = nothing, ) where {Impl} - domain = config.domain + domain = config.model_domain cache = config.cache results = map(enumerate(cache.caches)) do (i, ch) p = @views params[cache.parameter_mapping[i]] diff --git a/src/fitting/result.jl b/src/fitting/result.jl index 16606101..84d856b0 100644 --- a/src/fitting/result.jl +++ b/src/fitting/result.jl @@ -68,14 +68,14 @@ measure(stat::AbstractStatistic, slice::FittingResult, args...) = function invoke_result(result::FittingResult, u) @assert length(u) == length(result.u) - _invoke_and_transform!(result.config.cache, result.config.domain, u) + _invoke_and_transform!(result.config.cache, result.config.model_domain, u) end function Base.getindex(result::FittingResult, i) if i == 1 FittingResultSlice( result.config.cache, - result.config.domain, + result.config.model_domain, result.config.objective, result.config.variance, result.u, @@ -114,7 +114,7 @@ function Base.getindex(result::MultiFittingResult, i::Int) o_start, o_end = _get_range(result.config.cache.objective_mapping, i) @views FittingResultSlice( cache, - result.config.domain[d_start:d_end], + result.config.model_domain[d_start:d_end], result.config.objective[o_start:o_end], result.config.variance[o_start:o_end], u, diff --git a/src/julia-models/additive.jl b/src/julia-models/additive.jl index ccc294de..07e53a3b 100644 --- a/src/julia-models/additive.jl +++ b/src/julia-models/additive.jl @@ -110,4 +110,24 @@ end end end -export PowerLaw, BlackBody +struct GaussianLine{T} <: AbstractSpectralModel{T,Additive} + "Normalisation." + K::T + "Mean" + μ::T + "Standard deviation" + σ::T +end +function GaussianLine(; K = FitParam(1.0), μ = FitParam(6.4), σ = FitParam(1.0)) + GaussianLine{typeof(K)}(K, μ, σ) +end +@inline function invoke!(flux, energy, model::GaussianLine) + let μ = model.μ, σ = model.σ + integration_kernel!(flux, energy) do Elow, δE + E = Elow + δE / 2 + δE * inv(σ * √(2π)) * exp(-1 * (E - μ)^2 / (2 * σ^2)) + end + end +end + +export PowerLaw, BlackBody, GaussianLine diff --git a/src/julia-models/convolutional.jl b/src/julia-models/convolutional.jl index af03e700..78c98b85 100644 --- a/src/julia-models/convolutional.jl +++ b/src/julia-models/convolutional.jl @@ -24,8 +24,6 @@ function invoke!(flux, energy, model::Log10Flux) lastindex(energy) - 1, ) - @show ilow, ihigh - total_e_flux = zero(eltype(flux)) # low bin straddle diff --git a/src/plotting-recipes.jl b/src/plotting-recipes.jl index 918f9415..0bf7a306 100644 --- a/src/plotting-recipes.jl +++ b/src/plotting-recipes.jl @@ -51,7 +51,7 @@ end @recipe function _plotting_func(dataset::AbstractDataset, result::FittingResult) label --> "fit" seriestype --> :stepmid - y = _f_objective(result.config)(result.config.domain, result.u) + y = _f_objective(result.config)(result.config.model_domain, result.u) x = plotting_domain(dataset) if length(y) != length(x) error( @@ -139,8 +139,6 @@ end ) end - println("Debug: Creating a residual plot") - data = r.args[1] x = plotting_domain(data) # at the moment I don't understand why the following line is necessary diff --git a/src/simulate.jl b/src/simulate.jl index 0434b212..4da29ad7 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -1,5 +1,7 @@ + mutable struct SimulatedSpectrum{T,F} <: AbstractDataset - domain::Vector{T} + model_domain::Vector{T} + output_domain::Vector{T} data::Vector{T} errors::Vector{T} units::Union{Nothing,SpectralUnits.RateOrCount} @@ -19,19 +21,26 @@ function make_objective_variance(::ContiguouslyBinned, dataset::SimulatedSpectru dataset.errors .^ 2 end -function make_model_domain(::ContiguouslyBinned, dataset::SimulatedSpectrum) - dataset.domain -end +make_model_domain(::ContiguouslyBinned, dataset::SimulatedSpectrum) = dataset.model_domain +make_output_domain(::ContiguouslyBinned, dataset::SimulatedSpectrum) = dataset.output_domain + +objective_transformer(::ContiguouslyBinned, dataset::SimulatedSpectrum) = + dataset.transformer!! -bin_widths(dataset::SimulatedSpectrum) = diff(dataset.domain) -plotting_domain(dataset::SimulatedSpectrum) = dataset.domain[1:end-1] .+ bin_widths(dataset) +bin_widths(dataset::SimulatedSpectrum) = diff(dataset.output_domain) +plotting_domain(dataset::SimulatedSpectrum) = + dataset.output_domain[1:end-1] .+ (bin_widths(dataset) ./ 2) objective_units(dataset::SimulatedSpectrum) = dataset.units function _printinfo(io::IO, spectrum::SimulatedSpectrum) dmin, dmax = prettyfloat.(extrema(spectrum.data)) - descr = """SimulatedSpectrum: + xmin, xmax = prettyfloat.(extrema(spectrum.model_domain)) + omin, omax = prettyfloat.(extrema(spectrum.output_domain)) + descr = """SimulatedSpectrum with $(length(spectrum.data)) channels: Units : $(spectrum.units) . Data (min/max) : ($dmin, $dmax) + . Domain (min/max) : ($xmin, $xmax) + . Out Dmn. (min/max) : ($omin, $omax) """ print(io, descr) end @@ -39,33 +48,92 @@ end function simulate!( config::FittingConfig, p; - simulate_distribution = Distributions.Normal, + simulate_distribution = Distributions.Poisson, rng = Random.default_rng(), + exposure_time = 1e5, ) - config.objective .= _invoke_and_transform!(config.cache, config.domain, p) + config.objective .= _invoke_and_transform!(config.cache, config.model_domain, p) for (i, m) in enumerate(config.objective) - distr = simulate_distribution(m, sqrt(config.variance[i])) - config.objective[i] = rand(rng, distr) + distr = simulate_distribution(m * exposure_time) + count = rand(rng, distr) + config.objective[i] = count / exposure_time + config.variance[i] = sqrt(abs(count)) / exposure_time end end -function simulate(prob::FittingProblem; seed = abs(randint()), kwargs...) - kw, conf = _unpack_fitting_configuration(prob; kwargs...) +function simulate!(conf::FittingConfig; seed = abs(rand(Int)), kwargs...) rng = Random.default_rng(seed) Random.seed!(rng, seed) - simulate!(conf, get_value.(conf.parameters); rng = rng, kw...) + simulate!(conf, get_value.(conf.parameters); rng = rng, kwargs...) SimulatedSpectrum( - conf.domain, + conf.model_domain, + conf.output_domain, conf.objective, - sqrt.(conf.variance), - nothing, + # variance has already been sqrt'd + conf.variance, + u"counts / (s * keV)", conf.cache.transformer!!, seed, ) end +function _make_simulation_fitting_config( + model::AbstractSpectralModel, + response::ResponseMatrix{T}, + ancillary; + layout = ContiguouslyBinned(), + input_domain = response_energy(response), + output_domain = folded_energy(response), + kwargs..., +) where {T} + if !supports(layout, model) + throw("Model must support desired layout for simulation.") + end + + R = if !isnothing(ancillary) + fold_ancillary(response, ancillary) + else + response.matrix + end + + ΔE = diff(output_domain) + + objective = zeros(eltype(output_domain), length(output_domain) - 1) + variance = ones(eltype(objective), size(objective)) + + cache = SpectralCache( + layout, + model, + input_domain, + objective, + _fold_transformer(T, layout, R, ΔE, input_domain), + ) + + conf = FittingConfig( + implementation(model), + cache, + _allocate_free_parameters(model), + input_domain, + output_domain, + objective, + variance, + ) + kwargs, conf +end + +function simulate( + model::AbstractSpectralModel, + response::ResponseMatrix, + ancillary::Union{Nothing,<:AncillaryResponse} = nothing; + kwargs..., +) + kw, conf = _make_simulation_fitting_config(model, response, ancillary; kwargs...) + simulate!(conf; kw...) +end + function simulate(model::AbstractSpectralModel, dataset::AbstractDataset; kwargs...) - simulate(FittingProblem(model => dataset); kwargs...) + kw, conf = _unpack_fitting_configuration(FittingProblem(model => dataset); kwargs...) + simulate!(conf; kw...) end diff --git a/test/fitting/test-cache.jl b/test/fitting/test-cache.jl index 71418ca4..e202c752 100644 --- a/test/fitting/test-cache.jl +++ b/test/fitting/test-cache.jl @@ -12,7 +12,7 @@ config = SpectralFitting.FittingConfig(model, dummy_data) f = SpectralFitting._f_objective(config) params = get_value.(config.parameters) -domain = config.domain +domain = config.model_domain result = f(domain, params) # todo: currently we still allocate cus extracting the frozen parameters diff --git a/test/fitting/test-fit-simple-dataset.jl b/test/fitting/test-fit-simple-dataset.jl index d8943b87..3b0a5cdd 100644 --- a/test/fitting/test-fit-simple-dataset.jl +++ b/test/fitting/test-fit-simple-dataset.jl @@ -43,7 +43,7 @@ model = PowerLaw() prob = FittingProblem(model => data) result = fit(prob, LevenbergMarquadt()) -@test result.u[2] ≈ 6.1 atol = 0.1 +@test result.u[2] ≈ 6.2 atol = 0.1 # fitting a contiguously binned dataset with some masked bins x = 10 .^ collect(range(-1, 2, 10)) diff --git a/test/models/test-model-consistency.jl b/test/models/test-model-consistency.jl index f5c07848..107ad4e8 100644 --- a/test/models/test-model-consistency.jl +++ b/test/models/test-model-consistency.jl @@ -22,9 +22,14 @@ end end - y = ones(Float64, 10) x = collect(range(0.0, 5.0, length(y) + 1)) output_xs = invokemodel!(y, x, XS_CalculateFlux()) |> copy output_jl = invokemodel!(y, x, SpectralFitting.Log10Flux()) |> copy -@test output_xs ≈ output_jl atol = 1e-8 \ No newline at end of file +@test output_xs ≈ output_jl atol = 1e-8 + + +x = 10 .^ collect(range(log10(4), log10(10.0), 100)) +f1 = invokemodel(x, XS_Gaussian()) +f2 = invokemodel(x, GaussianLine()) +@test f1 ≈ f2 atol = 1e-4 diff --git a/test/runtests.jl b/test/runtests.jl index c35e2f59..02436b38 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -114,6 +114,17 @@ end end end +@testset "simulation" verbose = true begin + include("simulation/test-simulation.jl") + if has_test_dir + @testset "sample-data" begin + include("simulation/test-sample-data-sim.jl") + end + else + @warn "Skipping simulating real observatory tests." + end +end + using Aqua # little bit of aqua diff --git a/test/simulation/test-sample-data-sim.jl b/test/simulation/test-sample-data-sim.jl new file mode 100644 index 00000000..a1ee214a --- /dev/null +++ b/test/simulation/test-sample-data-sim.jl @@ -0,0 +1,21 @@ +using Test, SpectralFitting + +# path to the data directory +data1 = SpectralFitting.XmmData( + SpectralFitting.XmmEPIC(), + joinpath(testdir, "xmm/pn_spec_grp.fits"), +) + + +# smoke test +model = GaussianLine() + PowerLaw(a = FitParam(0.2)) +sim = simulate( + model, + data1.data.response, + data1.data.ancillary; + seed = 8, + exposure_time = 1e1, +) + +# TODO: add a fit. can't do it at the moment as the simulated datasets don't +# support masking the model domain, so we have singular values at high energies diff --git a/test/simulation/test-simulation.jl b/test/simulation/test-simulation.jl new file mode 100644 index 00000000..46f5c0e9 --- /dev/null +++ b/test/simulation/test-simulation.jl @@ -0,0 +1,20 @@ +using Test, SpectralFitting + +include("../dummies.jl") + +dummy_data = make_dummy_dataset((E) -> (E^(-3.0)); units = u"counts / (s * keV)") +model = PowerLaw() + +sim_data = simulate(model, dummy_data; seed = 2, exposure_time = 1e5) + +@test sum(sim_data.data) ≈ 4.897 atol = 1e-2 + +prob = FittingProblem(model => sim_data) +result = fit(prob, LevenbergMarquadt()) + +@test result.u ≈ [1.0, 2.0] atol = 1e-2 + + +# test the simulation api +sim_data2 = simulate(model, dummy_data.response; seed = 2) +@test sum(sim_data2.data) ≈ 4.897 atol = 1e-2