simonsobs · HTJense · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# fg marginalization output
+scripts/fgmarge/
diff --git a/docs/cmbonly.rst b/docs/cmbonly.rst
@@ -0,0 +1,13 @@
+CMBonly (Primary CMB)
+=====
+
+.. automodule:: soliket.cmbonly.cmbonly
+
+CMB-only Likelihood
+-------------------
+
+.. autoclass:: soliket.cmbonly.CMBonly
+    :exclude-members: initialize
+    :members:
+    :private-members:
+    :show-inheritance:
diff --git a/docs/fgmarge.rst b/docs/fgmarge.rst
@@ -0,0 +1,93 @@
+Foreground Marginalization (Primary CMB)
+======
+
+.. automodule:: soliket.mflike.foreground_marginalization
+
+Performing a Foreground Marginalization
+---------------------------------------
+
+To perform a foreground marginalization, the script
+``scripts/foreground_marginalization.py`` is provided. It performs a Gibbs sampling of
+the CMB bandpowers and foreground parameters, where the former is estimated directly from
+the data, and the latter is estimated with the Metropolis-Hastings algorithm.
+
+To perform a foreground marginalization, simply run the script. It will write the data
+to a couple of plain text files:
+
+* ``leff.txt`` contains the l ranges of the extracted bandpower bins.
+* ``samples.txt`` contains the weights, log-posterior, and values of foreground parameters
+of the MH chain.
+* ``progress.txt`` contains convergence statistics as the chain progresses.
+* ``covmat.txt`` contains the latest estimate for the foreground parameter covariance.
+* ``extracted.txt`` contains the individual samples of the extracted CMB bandpowers.
+
+There are a variety of parameters that can be modified by the user if you so desire, or
+customized if you want to apply the code to a different dataset.
+
+
+Customizing the dataset
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The code uses the ``soliket.ForegroundMarginalizer`` class, which is a slightly customized
+version of ``soliket.MFLike`` that adds a few utility functions for the gibbs sampling. If
+you wish to customize the dataset or foreground/systematics model that is used, you can
+do so similar to the customization options that exist in ``MFLike``. You can provide the
+``ForegroundMarginalizer`` with:
+
+* ``bandpass`` a ``soliket.BandPass`` class that performs the bandpass integration.
+* ``foregrounds`` a ``soliket.Foreground`` model that computes the foreground model.
+* ``theoryforge`` a ``soliket.TheoryForge_MFLike`` class that combines the previous two
+and includes instrumental systematics into the data vector.
+
+
+Customizing the parameter sampling
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The parameters are split into two groups: sampled parameters and fixed parameters. By
+default, the former are sampled from a multivariate gaussian distribution that is
+re-estimated every few steps; the latter are kept fixed throughout the entire chain.
+
+If you want to add your own parameter, you can do so by:
+
+1. Adding the name of the parameter to the ``param_names`` list;
+2. Adding the starting point of the parameter to the ``starting_point`` array;
+3. Providing an initial proposal covariance to the ``proposal`` array;
+4. You can optionally provide a hard prior range in the ``param_ranges`` array, or you
+can include a log-prior function in the ``logprior()`` function.
+
+
+By default, the code starts with a diagonal covariance matrix with very small step sizes,
+with the initial point in a region of parameter space where we expect the chain to
+converge to. This is done so that the chain initially accepts a lot of steps, and it can
+better re-estimate the proposal matrix as it goes. If you want to, you can load your own
+matrix into the ``proposal`` matrix, for example, by doing::
+
+    covmat = np.loadtxt("covmat.txt")
+    proposal = np.linalg.cholesky(covmat, lower = True)
+
+The ``update_proposal_every`` parameter is the number of accepted steps in the chain
+between two (attempted) updates of the proposal matrix. You can set this number to zero
+if you want to disable this.
+
+
+Putting the output in a SACC file
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``data_to_sacc.py`` script reads in the outputs from the foreground marginalization
+script and puts it into a sacc file, that can be directly read by the ``soliket.CMBonly``
+likelihood.
+
+By default, this script will ignore the first 30% of your chain (which is usually good
+enough to ensure a decent burn-in period). It will assume that the 145x145 binning
+window is optimal for the spectra.
+
+
+Foreground Marginalizer
+-----------------------
+
+.. autoclass:: soliket.mflike.ForegroundMarginalizer
+    :exclude-members: initialize
+    :members:
+    :private-members:
+    :show-inheritance:
+
diff --git a/docs/index.rst b/docs/index.rst
@@ -42,6 +42,8 @@ The pages here describe how to install and run SOLikeT, and document the functio
    :maxdepth: 1
 
    mflike
+   fgmarge
+   cmbonly
    lensing
    clusters
    xcorr
@@ -76,4 +78,4 @@ The pages here describe how to install and run SOLikeT, and document the functio
 
 .. |Codecov| image:: https://codecov.io/gh/simonsobs/SOLikeT/branch/master/graph/badge.svg?token=ND945EQDWR
    :target: https://codecov.io/gh/simonsobs/SOLikeT
-   :alt: Test Coverage
+   :alt: Test Coverage
diff --git a/scripts/cmbonly.yaml b/scripts/cmbonly.yaml
@@ -0,0 +1,94 @@
+output: chains/cmbonly
+debug: false
+
+sampler:
+  mcmc:
+    max_tries: 1000000
+
+likelihood:
+  soliket.CMBonly:
+    input_file: so_cmb_sacc_00000.fits
+    stop_at_error: True
+
+theory:
+  camb:
+    extra_args:
+      lens_potential_accuracy: 8
+      lens_margin: 2050
+      min_l_logl_sampling: 6000
+      kmax: 10.0
+      k_per_logint: 130
+      lSampleBoost: 1.2
+      DoLateRadTruncation: false
+
+params:
+  # Cosmology
+  ombh2:
+    prior:
+      min: 0.015
+      max: 0.030
+    ref:
+      dist: norm
+      loc: 0.022
+      scale: 0.002
+    proposal: 1e-3
+    latex: \Omega_b h^2
+  omch2:
+    prior:
+      min: 0.09
+      max: 0.15
+    ref:
+      dist: norm
+      loc: 0.12
+      scale: 0.02
+    proposal: 1e-2
+    latex: \Omega_c h^2
+  logA:
+    prior:
+      min: 2.5
+      max: 3.5
+    ref:
+      dist: norm
+      loc: 3.0
+      scale: 0.1
+    proposal: 1e-2
+    latex: \log(10^{10} A_s)
+  ns:
+    prior:
+      min: 0.85
+      max: 1.05
+    ref:
+      dist: norm
+      loc: 0.96
+      scale: 0.05
+    proposal: 1e-3
+    latex: n_s
+  h:
+    prior:
+      min: 0.4
+      max: 1.0
+    ref:
+      dist: norm
+      loc: 0.67
+      scale: 0.1
+    proposal: 1e-3
+    latex: h
+  tau:
+    prior:
+      min: 0.02
+      max: 0.20
+    ref:
+      dist: norm
+      loc: 0.0544
+      scale: 0.0073
+    proposal: 7e-3
+    latex: \tau_\mathrm{reio}
+  H0:
+    derived: True
+    value: "lambda h: 100.0 * h"
+  As:
+    derived: True
+    value: "lambda logA: 1e-10 * np.exp(logA)"
+
+prior:
+  tau_priors: "lambda tau: stats.norm.logpdf(tau, loc = 5.44e-2, scale = 7.3e-3)"
diff --git a/scripts/data_to_sacc.py b/scripts/data_to_sacc.py
@@ -0,0 +1,106 @@
+r"""
+.. module:: fgmarge.data_to_sacc
+
+:Synopsis: Compress the data into a sacc file.
+:Authors: Hidde Jense.
+"""
+import numpy as np
+import sacc
+from soliket.mflike.ForegroundMarginalizer import ForegroundMarginalizer
+
+"""
+You can modify the code below here!
+"""
+
+outdir = "fgmarge"
+
+fgmarge = ForegroundMarginalizer({
+    # Put your nondefault settings in here.
+    "input_file": "LAT_simu_sacc_00000.fits",
+    "cov_Bbl_file": "data_sacc_w_covar_and_Bbl.fits",
+    "data_folder": "MFLike/v0.8",
+    "lmax_extract": {
+        "tt": 7000,
+        "te": 7000,
+        "ee": 7000
+    },
+    "foregrounds": None,
+    "theoryforge": None,
+    "bandpass": None
+})
+
+# This doesn't need to be changed, these parameters only need to be passed on to
+# the foreground marginalizer for consistency's sake. Unless you add new calibration
+# parameters, you can leave this dict as-is.
+fixed_params = {
+    "T_d": 9.6,
+    "cal_LAT_93": 1,
+    "cal_LAT_145": 1,
+    "cal_LAT_225": 1,
+    "calT_LAT_93": 1,
+    "calT_LAT_145": 1,
+    "calT_LAT_225": 1,
+    "calE_LAT_93": 1,
+    "calE_LAT_145": 1,
+    "calE_LAT_225": 1,
+    "alpha_LAT_93": 0,
+    "alpha_LAT_145": 0,
+    "alpha_LAT_225": 0,
+    "bandint_shift_LAT_93": 0,
+    "bandint_shift_LAT_145": 0,
+    "bandint_shift_LAT_225": 0,
+    "calG_all": 1
+}
+
+fgmarge.make_mapping_matrix(**fixed_params)
+
+cmb_ls = np.loadtxt(f"{outdir}/leff.txt")
+cmb_samples = np.loadtxt(f"{outdir}/extracted.txt")
+
+n = int(0.3 * cmb_samples.shape[0])
+
+cmb_mean = np.nanmean(cmb_samples[n:, :], axis=0)
+cmb_cov = np.cov(cmb_samples[n:, :].T)
+
+s = sacc.Sacc()
+
+s.add_tracer("misc", "LAT_cmb_s0")
+s.add_tracer("misc", "LAT_cmb_s2")
+
+datatypes = {
+    "tt": "cl_00",
+    "te": "cl_0e",
+    "ee": "cl_ee",
+}
+
+tracers = {
+    "tt": ("LAT_cmb_s0", "LAT_cmb_s0"),
+    "te": ("LAT_cmb_s0", "LAT_cmb_s2"),
+    "ee": ("LAT_cmb_s2", "LAT_cmb_s2"),
+}
+
+indices = []
+
+for m in fgmarge.spec_meta:
+    if m["t1"] == "LAT_145" and m["t2"] == "LAT_145":
+        p = m["pol"]
+        bin0 = fgmarge.extract_zero[p]
+        bin1 = bin0 + fgmarge.extract_bins[p]
+        win = m["bpw"]
+
+        win = sacc.windows.BandpowerWindow(2 + np.arange(fgmarge.lmax_extract[p] + 1),
+                                           win.weight[:fgmarge.lmax_extract[p] + 1,
+                                                      :fgmarge.extract_bins[p]])
+
+        t1, t2 = tracers[p]
+        s.add_ell_cl(datatypes.get(p), t1, t2, cmb_ls[bin0:bin1],
+                     cmb_mean[bin0:bin1], window=win)
+
+        _, _, ind = s.get_ell_cl(datatypes.get(p), t1, t2, return_ind=True)
+        indices.append(ind)
+
+sorted_indices = np.concatenate(indices)
+
+s.add_covariance(cmb_cov)
+
+s.save_fits(f"{outdir}/so_cmb_sacc_00000.fits", overwrite=True)