-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
312 lines (295 loc) · 19.3 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
@misc{olah_2015,
author = {Olah, Christopher},
booktitle = {Neural Networks, Types, and Functional Programming},
month = {sep},
title = {{Neural Networks, Types, and Functional Programming}},
url = {https://colah.github.io/posts/2015-09-NN-Types-FP/},
year = {2015}
}
@misc{lecun_2018,
author = {LeCun, Yann},
booktitle = {Official Facebook Post},
month = {jan},
title = {{Deep Learning est mort. Vive Differentiable Programming!}},
howpublished = {\url{https://www.facebook.com/yann.lecun/posts/10155003011462143}},
url = {https://www.facebook.com/yann.lecun/posts/10155003011462143},
note = {\url{https://techburst.io/deep-learning-est-mort-vive-differentiable-programming-5060d3c55074}},
year = {2018}
}
@misc{swift,
author = {Wei, Richard and Zheng,Dan and Rasi, Marc and Chrzaszcz, Bart},
booktitle = {Differentiable Programming Manifesto},
month = {Nov},
title = {{Differentiable Programming Manifesto}},
url = {https://github.com/apple/swift/blob/master/docs/DifferentiableProgramming.md
},
year = {2019}
}
@article{deCastro:2018mgh,
author = "De Castro, Pablo and Dorigo, Tommaso",
title = "{INFERNO: Inference-Aware Neural Optimisation}",
eprint = "1806.04743",
archivePrefix = "arXiv",
primaryClass = "stat.ML",
doi = "10.1016/j.cpc.2019.06.007",
journal = "Comput. Phys. Commun.",
volume = "244",
pages = "170--179",
year = "2019"
}
@article{asymptotics,
author = "Cowan, Glen and Cranmer, Kyle and Gross, Eilam and Vitells, Ofer",
title = "{Asymptotic formulae for likelihood-based tests of new physics}",
eprint = "1007.1727",
archivePrefix = "arXiv",
primaryClass = "physics.data-an",
doi = "10.1140/epjc/s10052-011-1554-0",
journal = "Eur. Phys. J. C",
volume = "71",
pages = "1554",
year = "2011",
note = "[Erratum: Eur.Phys.J.C 73, 2501 (2013)]"
}
@article{baydin2018automatic,
abstract = {Derivatives, mostly in the form of gradients and Hessians, are ubiquitous in machine learning. Automatic differentiation (AD), also called algorithmic differentiation or simply “autodiff”, is a family of techniques similar to but more general than backpropagation for efficiently and accurately evaluating derivatives of numeric functions expressed as computer programs. AD is a small but established field with applications in areas including computational fluid dynamics, atmospheric sciences, and engineering design optimization. Until very recently, the fields of machine learning and AD have largely been unaware of each other and, in some cases, have independently discovered each other's results. Despite its relevance, general-purpose AD has been missing from the machine learning toolbox, a situation slowly changing with its ongoing adoption under the names “dynamic computational graphs” and “differentiable programming”. We survey the intersection of AD and machine learning, cover applications where AD has direct relevance, and address the main implementation techniques. By precisely defining the main differentiation techniques and their interrelationships, we aim to bring clarity to the usage of the terms “autodiff”, “automatic differentiation”, and “symbolic differentiation” as these are encountered more and more in machine learning settings.},
archivePrefix = {arXiv},
arxivId = {1502.05767},
author = {{G{\"{u}}neş Baydin}, Atılım and Pearlmutter, Barak A. and {Andreyevich Radul}, Alexey and {Mark Siskind}, Jeffrey},
eprint = {1502.05767},
issn = {15337928},
journal = {Journal of Machine Learning Research},
keywords = {Backpropagation,Differentiable Programming},
number = {153},
pages = {1--43},
title = {{Automatic differentiation in machine learning: A survey}},
volume = {18},
year = {2018}
}
@article{Brehmer:2018hga,
abstract = {Simulators often provide the best description of real-world phenomena. However, the probability density that they implicitly define is often intractable, leading to challenging inverse problems for inference. Recently, a number of techniques have been introduced in which a surrogate for the intractable density is learned, including normalizing flows and density ratio estimators. We show that additional information that characterizes the latent process can often be extracted from simulators and used to augment the training data for these surrogate models. We introduce several loss functions that leverage these augmented data and demonstrate that these techniques can improve sample efficiency and quality of inference.},
archivePrefix = {arXiv},
arxivId = {1805.12244},
author = {Brehmer, Johann and Louppe, Gilles and Pavez, Juan and Cranmer, Kyle},
doi = {10.1073/pnas.1915980117},
eprint = {1805.12244},
issn = {10916490},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
keywords = {Implicit models,Neural density estimation,Simulation-based inference},
month = {mar},
number = {10},
pages = {5242--5249},
pmid = {32079725},
publisher = {National Academy of Sciences},
title = {{Mining gold from implicit models to improve likelihood-free inference}},
url = {http://arxiv.org/abs/1805.12244},
note = {\url{http://www.ncbi.nlm.nih.gov/pubmed/32079725}},
volume = {117},
year = {2020}
}
@article{Cranmer201912789,
Abstract = {Many domains of science have developed complex simulations to describe phenomena of interest. While these simulations provide high-fidelity models, they are poorly suited for inference and lead to challenging inverse problems. We review the rapidly developing field of simulation-based inference and identify the forces giving additional momentum to the field. Finally, we describe how the frontier is expanding so that a broad audience can appreciate the profound influence these developments may have on science.},
Author = {Cranmer, Kyle and Brehmer, Johann and Louppe, Gilles},
Doi = {10.1073/pnas.1912789117},
Elocation-Id = {201912789},
Eprint = {https://www.pnas.org/content/early/2020/05/28/1912789117.full.pdf},
Issn = {0027-8424},
Journal = {Proceedings of the National Academy of Sciences},
Publisher = {National Academy of Sciences},
Title = {The frontier of simulation-based inference},
Url = {https://www.pnas.org/content/early/2020/05/28/1912789117},
Year = {2020},
Bdsk-Url-1 = {https://www.pnas.org/content/early/2020/05/28/1912789117},
Bdsk-Url-2 = {https://doi.org/10.1073/pnas.1912789117}}
@article{Carleo:2019ptp,
abstract = {Machine learning (ML) encompasses a broad range of algorithms and modeling tools used for a vast array of data processing tasks, which has entered most scientific disciplines in recent years. This article reviews in a selective way the recent research on the interface between machine learning and the physical sciences. This includes conceptual developments in ML motivated by physical insights, applications of machine learning techniques to several domains in physics, and cross fertilization between the two fields. After giving a basic notion of machine learning methods and principles, examples are described of how statistical physics is used to understand methods in ML. This review then describes applications of ML methods in particle physics and cosmology, quantum many-body physics, quantum computing, and chemical and material physics. Research and development into novel computing architectures aimed at accelerating ML are also highlighted. Each of the sections describe recent successes as well as domain-specific methodology and challenges.},
archivePrefix = {arXiv},
arxivId = {1903.10563},
author = {Carleo, Giuseppe and Cirac, Ignacio and Cranmer, Kyle and Daudet, Laurent and Schuld, Maria and Tishby, Naftali and Vogt-Maranto, Leslie and Zdeborov{\'{a}}, Lenka},
doi = {10.1103/RevModPhys.91.045002},
eprint = {1903.10563},
issn = {15390756},
journal = {Reviews of Modern Physics},
number = {4},
title = {{Machine learning and the physical sciences}},
volume = {91},
year = {2019}
}
@article{Brehmer:2019jyt,
abstract = {The subtle and unique imprint of dark matter substructure on extended arcs in strong lensing systems contains a wealth of information about the properties and distribution of dark matter on small scales and, consequently, about the underlying particle physics. However, teasing out this effect poses a significant challenge since the likelihood function for realistic simulations of population-level parameters is intractable. We apply recently-developed simulation-based inference techniques to the problem of substructure inference in galaxy-galaxy strong lenses. By leveraging additional information extracted from the simulator, neural networks are efficiently trained to estimate likelihood ratios associated with population-level parameters characterizing substructure. Through proof-of-principle application to simulated data, we show that these methods can provide an efficient and principled way to simultaneously analyze an ensemble of strong lenses, and can be used to mine the large sample of lensing images deliverable by near-future surveys for signatures of dark matter substructure.},
archivePrefix = {arXiv},
arxivId = {1909.02005},
author = {Brehmer, Johann and Mishra-Sharma, Siddharth and Hermans, Joeri and Louppe, Gilles and Cranmer, Kyle},
doi = {10.3847/1538-4357/ab4c41},
eprint = {1909.02005},
issn = {1538-4357},
journal = {The Astrophysical Journal},
number = {1},
pages = {49},
title = {{Mining for Dark Matter Substructure: Inferring Subhalo Population Properties from Strong Lenses with Machine Learning}},
volume = {886},
year = {2019}
}
@article{Alsing:2018eau,
abstract = {Many statistical models in cosmology can be simulated forwards but have intractable likelihood functions. Likelihood-free inference methods allow us to perform Bayesian inference from these models using only forward simulations, free from any likelihood assumptions or approximations. Likelihood-free inference generically involves simulating mock data and comparing to the observed data; this comparison in data space suffers from the curse of dimensionality and requires compression of the data to a small number of summary statistics to be tractable. In this paper, we use massive asymptotically optimal data compression to reduce the dimensionality of the data space to just one number per parameter, providing a natural and optimal framework for summary statistic choice for likelihood-free inference. Secondly, we present the first cosmological application of Density Estimation Likelihood-Free Inference (DELFI), which learns a parametrized model for joint distribution of data and parameters, yielding both the parameter posterior and the model evidence. This approach is conceptually simple, requires less tuning than traditional Approximate Bayesian Computation approaches to likelihood-free inference and can give high-fidelity posteriors from orders of magnitude fewer forward simulations. As an additional bonus, it enables parameter inference and Bayesian model comparison simultaneously. We demonstrate DELFI with massive data compression on an analysis of the joint light-curve analysis supernova data, as a simple validation case study. We show that high-fidelity posterior inference is possible for full-scale cosmological data analyses with as few as {\~{}}104 simulations, with substantial scope for further improvement, demonstrating the scalability of likelihood-free inference to large and complex cosmological data sets.},
archivePrefix = {arXiv},
arxivId = {1801.01497},
author = {Alsing, Justin and Wandelt, Benjamin and Feeney, Stephen},
doi = {10.1093/mnras/sty819},
eprint = {1801.01497},
issn = {13652966},
journal = {Monthly Notices of the Royal Astronomical Society},
keywords = {Methods: Data analysis},
number = {3},
pages = {2874--2885},
title = {{Massive optimal data compression and density estimation for scalable, likelihood-free inference in cosmology}},
volume = {477},
year = {2018}
}
@article{2015Natur.521..436L,
abstract = {Deep learning allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. These methods have dramatically improved the state-of-the-art in speech recognition, visual object recognition, object detection and many other domains such as drug discovery and genomics. Deep learning discovers intricate structure in large data sets by using the backpropagation algorithm to indicate how a machine should change its internal parameters that are used to compute the representation in each layer from the representation in the previous layer. Deep convolutional nets have brought about breakthroughs in processing images, video, speech and audio, whereas recurrent nets have shone light on sequential data such as text and speech.},
author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
doi = {10.1038/nature14539},
issn = {14764687},
journal = {Nature},
month = {may},
number = {7553},
pages = {436--444},
pmid = {26017442},
title = {{Deep learning}},
volume = {521},
year = {2015}
}
@article{Brehmer:2018eca,
abstract = {We develop, discuss, and compare several inference techniques to constrain theory parameters in collider experiments. By harnessing the latent-space structure of particle physics processes, we extract extra information from the simulator. This augmented data can be used to train neural networks that precisely estimate the likelihood ratio. The new methods scale well to many observables and high-dimensional parameter spaces, do not require any approximations of the parton shower and detector response, and can be evaluated in microseconds. Using weak-boson-fusion Higgs production as an example process, we compare the performance of several techniques. The best results are found for likelihood ratio estimators trained with extra information about the score, the gradient of the log likelihood function with respect to the theory parameters. The score also provides sufficient statistics that contain all the information needed for inference in the neighborhood of the Standard Model. These methods enable us to put significantly stronger bounds on effective dimension-six operators than the traditional approach based on histograms. They also outperform generic machine learning methods that do not make use of the particle physics structure, demonstrating their potential to substantially improve the new physics reach of the Large Hadron Collider legacy results.},
archivePrefix = {arXiv},
arxivId = {1805.00020},
author = {Brehmer, Johann and Cranmer, Kyle and Louppe, Gilles and Pavez, Juan},
doi = {10.1103/PhysRevD.98.052004},
eprint = {1805.00020},
issn = {24700029},
journal = {Physical Review D},
number = {5},
pages = {052004},
title = {{A guide to constraining effective field theories with machine learning}},
volume = {98},
year = {2018}
}
@article{Brehmer:2018kdj,
abstract = {We present powerful new analysis techniques to constrain effective field theories at the LHC. By leveraging the structure of particle physics processes, we extract extra information from Monte Carlo simulations, which can be used to train neural network models that estimate the likelihood ratio. These methods scale well to processes with many observables and theory parameters, do not require any approximations of the parton shower or detector response, and can be evaluated in microseconds. We show that they allow us to put significantly stronger bounds on dimension-six operators than existing methods, demonstrating their potential to improve the precision of the LHC legacy constraints.},
archivePrefix = {arXiv},
arxivId = {1805.00013},
author = {Brehmer, Johann and Cranmer, Kyle and Louppe, Gilles and Pavez, Juan},
doi = {10.1103/PhysRevLett.121.111801},
eprint = {1805.00013},
issn = {10797114},
journal = {Physical Review Letters},
number = {11},
pages = {111801},
title = {{Constraining Effective Field Theories with Machine Learning}},
volume = {121},
year = {2018}
}
@software{neos,
author = {Lukas Heinrich and
Nathan Simpson},
title = {pyhf/neos: initial zenodo release},
month = mar,
year = 2020,
publisher = {Zenodo},
version = {0.0.2},
doi = {10.5281/zenodo.3697981},
url = {https://doi.org/10.5281/zenodo.3697981}
}
@software{pyhf,
author = {Lukas Heinrich and Matthew Feickert and Giordon Stark},
title = "{pyhf: v0.5.1}",
version = {0.5.1},
doi = {10.5281/zenodo.1169739},
url = {https://doi.org/10.5281/zenodo.1169739},
}
@software{awkward_array,
author = {Jim Pivarski},
title = "{Awkward Array: v0.13.0}",
version = {0.13.0},
doi = {10.5281/zenodo.1472436},
url = {https://doi.org/10.5281/zenodo.1472436},
}
@report{HistFactory,
author = "Cranmer, Kyle and Lewis, George and Moneta, Lorenzo and
Shibata, Akira and Verkerke, Wouter",
title = "{HistFactory: A tool for creating statistical models for
use with RooFit and RooStats}",
month = "Jan",
year = "(2012)",
reportNumber = "CERN-OPEN-2012-016",
number = "\href{https://cds.cern.ch/record/1456844}{CERN-OPEN-2012-016}",
}
@inproceedings{fsharp,
author = {Baydin, Atılım Güneş and Pearlmutter, Barak A. and Siskind, Jeffrey Mark},
booktitle = {7th International Conference on Algorithmic Differentiation, Christ Church Oxford, UK, September 12--15, 2016},
title = {DiffSharp: An {AD} Library for {.NET} Languages},
year = {2016},
eprint = {1611.03423},
arxivId = {1611.03423},
archivePrefix = {arXiv}
}
@article{julia,
title={Zygote: A differentiable programming system to bridge machine learning and scientific computing},
author={Innes, Mike and Edelman, Alan and Fischer, Keno and Rackauckus, Chris and Saba, Elliot and Shah, Viral B and Tebbutt, Will},
eprint = {1907.07587},
archivePrefix = {arXiv},
year = {2019}
}
@article{James:1975dr,
author = "James, F. and Roos, M.",
title = "{Minuit: A System for Function Minimization and Analysis of the Parameter Errors and Correlations}",
reportNumber = "CERN-DD-75-20",
doi = "10.1016/0010-4655(75)90039-9",
journal = "Comput. Phys. Commun.",
volume = "10",
pages = "343--367",
year = "1975"
}
@article{Bury:2020ewi,
author = "Bury, Florian and Delaere, Christophe",
title = "{Matrix Element Regression with Deep Neural Networks -- breaking the CPU barrier}",
eprint = "2008.10949",
archivePrefix = "arXiv",
primaryClass = "hep-ex",
reportNumber = "CP3-20-37",
month = "8",
year = "2020"
}
@software{Athena,
author = "{ATLAS Collaboration}",
title = {Athena},
month = apr,
year = 2019,
publisher = {Zenodo},
version = {21.2.79},
doi = {10.5281/zenodo.2641996},
url = {https://doi.org/10.5281/zenodo.2641996}
}
@software{CMSSW,
author = "{CMS Collaboration}",
title = "{CMSSW}",
year = 2020,
url = {https://github.com/cms-sw/cmssw}
}
@misc{diff_ME,
author = {Heinrich, Lukas},
title = {Differentiable Matrix Elements},
url = {https://github.com/lukasheinrich/differentiable_matrix_elements},
year = {2019}
}
@misc{cpp_P2072R0,
author={Foco, Marco and Rietmann, Max and Vassilev, Vassil and Wong, Michael},
title={{P2072R0: Differentiable programmingfor C++}},
howpublished={C++ Standards Committee Document P2072R0, \url{http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2072r0.pdf}}
}
@article{clad_in_root,
title={{Automatic Differentiation in ROOT}},
author={Vassilev, Vassil and Efremov, Aleksandr and Shadura, Oksana},
journal={arXiv preprint arXiv:2004.04435},
year={2020}
}