Merge pull request #100 from fjebaker/fergus/more-sim

More simulation work
fjebaker · May 14, 2024 · f54f999 · f54f999
2 parents f29caa4 + 119d88e
commit f54f999
Show file tree

Hide file tree

Showing 24 changed files with 425 additions and 152 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -6,9 +6,15 @@ on:
       - main
       - fergus/docs
 
+concurrency:
+  # cancels when a PR gets updated
+  group: ${{ github.head_ref || github.run_id }}-${{ github.actor }}
+  cancel-in-progress: true
+
 jobs:
   docs:
     name: Build and publish
+    timeout-minutes: 30
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -14,10 +14,16 @@ on:
       - 'src/**.jl'
       - 'test/**.jl'
 
+concurrency:
+  # cancels when a PR gets updated
+  group: ${{ github.head_ref || github.run_id }}-${{ github.actor }}
+  cancel-in-progress: true
+
 jobs:
 
   smoke-test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }}
+    timeout-minutes: 30
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:

diff --git a/docs/src/why-and-how.md b/docs/src/why-and-how.md
@@ -46,7 +46,8 @@ sum(total_flux)
 Doing so would allow us to only pre-allocate 2 flux arrays, instead of 4 when using the in-place variants:
 
 ```@example model_invocation
-flux1, flux2 = make_fluxes(model, energy, 2)
+fluxes = SpectralFitting.construct_objective_cache(model, energy)
+flux1, flux2 = eachcol(fluxes)
 
 invokemodel!(flux1, energy, XS_PowerLaw())
 invokemodel!(flux2, energy, XS_PowerLaw(a=FitParam(3.0)))
@@ -63,39 +64,12 @@ sum(flux1)
 It is precisely this re-writing that SpectralFitting performs via [`@generated`](https://docs.julialang.org/en/v1/manual/metaprogramming/#Generated-functions) functions. We can inspect the code used to generate the invocation body after defining a [`CompositeModel`](@ref):
 
 ```@example model_invocation
-fluxes = (flux1, flux2)
-
 model = XS_PhotoelectricAbsorption() * (
     XS_PowerLaw() + XS_PowerLaw(a=FitParam(3.0)) + XS_BlackBody()
 )
 
-params = get_value.(get_params(model))
-
-SpectralFitting.__generated_model_call!(fluxes, energy, typeof(model), params)
-nothing # hide
-```
-
-```@raw html
-<pre class="documenter-example-output"><code class="nohighlight hljs ansi">begin
-    @inbounds let (flux1, flux2) = fluxes
-        var"##K#356" = params[1]
-        var"##a#357" = params[2]
-        var"##K#358" = params[3]
-        var"##a#359" = params[4]
-        var"##K#360" = params[5]
-        var"##T#361" = params[6]
-        var"##ηH#362" = params[7]
-        invokemodel!(flux1, energy, XS_PowerLaw, var"##K#356", var"##a#357")
-        invokemodel!(flux2, energy, XS_PowerLaw, var"##K#358", var"##a#359")
-        @. flux1 = flux1 + flux2
-        invokemodel!(flux2, energy, XS_BlackBody, var"##K#360", var"##T#361")
-        @. flux1 = flux1 + flux2
-        invokemodel!(flux2, energy, XS_PhotoelectricAbsorption, var"##ηH#362")
-        @. flux1 = flux1 * flux2
-        return flux1
-    end
-end
-</code><button class="copy-button fas fa-copy"></button></pre>
+params = get_value.(SpectralFitting.model_parameters_tuple(model))
+SpectralFitting.FunctionGeneration.generated_model_call!(typeof(fluxes), typeof(energy), typeof(model), typeof(params))
 ```
 
 This generated function also takes care of some other things for us, such as unpacking parameters (optionally unpacking frozen parameters separately), and ensuring any closure are passed to [`invokemodel`](@ref) if a model needs them (e.g., [`SurrogateSpectralModel`](@ref)).

diff --git a/src/datasets/datasets.jl b/src/datasets/datasets.jl
@@ -94,6 +94,7 @@ specified by the caller.
 - [`make_objective`](@ref)
 - [`make_domain_variance`](@ref)
 - [`make_model_domain`](@ref)
+- [`make_ouput_domain`](@ref)
 
 """
 abstract type AbstractDataset end
@@ -128,13 +129,26 @@ make_objective_variance(layout::AbstractDataLayout, dataset::AbstractDataset) =
 """
     make_model_domain
 
-Returns the array used as the domain for the modelling
+Returns the array used as the domain for the modelling. This is paired with [`make_domain_variance`](@ref)
 """
 make_model_domain(layout::AbstractDataLayout, dataset::AbstractDataset) =
     error("Layout $(layout) is not implemented for $(typeof(dataset))")
 make_domain_variance(layout::AbstractDataLayout, dataset::AbstractDataset) =
     error("Layout $(layout) is not implemented for $(typeof(dataset))")
 
+"""
+    make_output_domain
+
+Returns the array used as the output domain. That is, in cases where the model
+input and output map to different domains, the input domain is said to be the
+model domain, the input domain is said to be the model domain. 
+
+The distinction is mainly used for the purposes of simulating data and for
+visualising data.
+"""
+make_output_domain(layout::AbstractDataLayout, dataset::AbstractDataset) =
+    error("Layout $(layout) is not implemented for $(typeof(dataset))")
+
 function objective_transformer(layout::AbstractDataLayout, dataset::AbstractDataset)
     @warn "Using default objective transformer!"
     _DEFAULT_TRANSFORMER()
@@ -160,6 +174,7 @@ Must support the same API, but may also have some query methods for specific int
 abstract type AbstractMultiDataset <: AbstractDataset end
 
 export make_model_domain,
+    make_output_domain,
     make_domain_variance,
     make_objective,
     make_objective_variance,

diff --git a/src/datasets/injectivedata.jl b/src/datasets/injectivedata.jl
@@ -35,15 +35,18 @@ make_objective(::ContiguouslyBinned, dataset::InjectiveData) =
 make_model_domain(::OneToOne, dataset::InjectiveData) = dataset.domain
 make_objective(::OneToOne, dataset::InjectiveData) = dataset.codomain[dataset.data_mask]
 
+make_output_domain(layout::AbstractDataLayout, dataset::InjectiveData) =
+    make_model_domain(layout, dataset)
+
 function make_objective_variance(
     ::AbstractDataLayout,
     dataset::InjectiveData{V},
 )::V where {V}
     if !isnothing(dataset.codomain_variance)
         dataset.codomain_variance[dataset.data_mask]
     else
-        # todo: i dunno just something
-        1e-8 .* dataset.codomain[dataset.data_mask]
+        # TODO: i dunno just something
+        ones(eltype(V), count(dataset.data_mask))
     end
 end
 

diff --git a/src/datasets/ogip.jl b/src/datasets/ogip.jl
@@ -209,8 +209,14 @@ function _build_reponse_matrix(
     channels::RMFChannels,
     T::Type,
 )
-    R = spzeros(T, header.num_channels, length(rmf.bins_low))
-    build_response_matrix!(R, rmf.f_chan, rmf.n_chan, rmf.matrix, header.first_channel)
+    R = build_response_matrix(
+        rmf.f_chan,
+        rmf.n_chan,
+        rmf.matrix,
+        header.num_channels,
+        header.first_channel,
+        T,
+    )
     SpectralFitting.ResponseMatrix(
         R,
         channels.channels,
@@ -221,6 +227,47 @@ function _build_reponse_matrix(
     )
 end
 
+function build_response_matrix(
+    f_chan::Vector,
+    n_chan::Vector,
+    matrix_rows::Vector,
+    num_cols::Int,
+    first_channel,
+    T::Type,
+)
+    ptrs = Int[1]
+    indices = Int[]
+    matrix = Float64[]
+
+    prev = first(ptrs)
+
+    for i in eachindex(f_chan)
+        M = matrix_rows[i]
+
+        row_len = 0
+        for (f, n) in zip(f_chan[i], n_chan[i])
+            if n == 0
+                # advance row
+                break
+            end
+            first = (f - first_channel) + 1
+            # append all of the indices
+            for j = 0:n-1
+                push!(indices, j + first)
+            end
+            append!(matrix, M[row_len+1:row_len+n])
+            row_len += n
+        end
+
+        next = row_len + prev
+        push!(ptrs, next)
+        prev = next
+    end
+
+    SparseArrays.SparseMatrixCSC{T,Int}(num_cols, length(f_chan), ptrs, indices, matrix)
+end
+
+# TODO: marked for refactoring (unused)
 function build_response_matrix!(
     R,
     f_chan::Vector,
@@ -351,19 +398,19 @@ function read_filename(header, entry, parent, exts...)
     if haskey(header, entry)
         path::String = strip(header[entry])
         if path == "NONE"
-            return missing
+            return nothing
         end
         name = find_file(data_directory, path, parent_name, exts)
-        if !ismissing(name)
+        if !isnothing(name)
             return name
         end
     end
-    missing
+    nothing
 end
 
 function find_file(dir, name, parent, extensions)
     if length(name) == 0
-        return missing
+        return nothing
     elseif match(r"%match%", name) !== nothing
         base = splitext(parent)[1]
         for ext in extensions
@@ -373,9 +420,9 @@ function find_file(dir, name, parent, extensions)
             end
         end
         @warn "Missing! Could not find file '%match%': tried $extensions"
-        return missing
+        return nothing
     elseif match(r"^none\b", name) !== nothing
-        return missing
+        return nothing
     end
     joinpath(dir, name)
 end

diff --git a/src/datasets/ogipdataset.jl b/src/datasets/ogipdataset.jl
@@ -10,9 +10,9 @@ end
 function load_ogip_dataset(
     spec_path;
     hdu = 2,
-    background = missing,
-    response = missing,
-    ancillary = missing,
+    background = nothing,
+    response = nothing,
+    ancillary = nothing,
     kwargs...,
 )
     paths = SpectralDataPaths(

diff --git a/src/datasets/response.jl b/src/datasets/response.jl
@@ -8,13 +8,32 @@ mutable struct ResponseMatrix{T}
     bins_high::Vector{T}
 end
 
+"""
+    response_energy(response::ResponseMatrix)
+
+Get the contiguously binned energy corresponding to the *input domain* of the
+response matrix. This is equivalent to the model domain.
+"""
 function response_energy(resp::ResponseMatrix{T}) where {T}
     E = zeros(T, length(resp.bins_low) + 1)
     E[1:end-1] .= resp.bins_low
     E[end] = resp.bins_high[end]
     E
 end
 
+"""
+    folded_energy(response::ResponseMatrix)
+
+Get the contiguously binned energy corresponding to the *output (folded) domain*
+of the response matrix. That is, the channel energies as used by the spectrum.
+"""
+function folded_energy(resp::ResponseMatrix{T}) where {T}
+    E = zeros(T, length(resp.channel_bins_low) + 1)
+    E[1:end-1] .= resp.channel_bins_low
+    E[end] = resp.channel_bins_high[end]
+    E
+end
+
 function regroup!(resp::ResponseMatrix{T}, grouping) where {T}
     itt = GroupingIterator(grouping)
     new_matrix = zeros(T, (length(itt), size(resp.matrix, 2)))