Skip to content

Commit

Permalink
Update bib with published paper and all refs
Browse files Browse the repository at this point in the history
  • Loading branch information
mdbenito committed Mar 30, 2024
1 parent 9f7e150 commit 9f90bfc
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 14 deletions.
38 changes: 28 additions & 10 deletions docs/assets/pydvl.bib
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ @article{agarwal_secondorder_2017
pages = {1--40},
url = {https://www.jmlr.org/papers/v18/16-491.html},
abstract = {First-order stochastic methods are the state-of-the-art in large-scale machine learning optimization owing to efficient per-iteration complexity. Second-order methods, while able to provide faster convergence, have been much less explored due to the high cost of computing the second-order information. In this paper we develop second-order stochastic methods for optimization problems in machine learning that match the per-iteration cost of gradient based methods, and in certain settings improve upon the overall running time over popular first-order methods. Furthermore, our algorithm has the desirable property of being implementable in time linear in the sparsity of the input data.},
langid = {english}
langid = {english},
keywords = {notion}
}

@inproceedings{bae_if_2022,
Expand Down Expand Up @@ -340,6 +341,19 @@ @inproceedings{li_achieving_2022
langid = {english}
}

@article{maleki_bounding_2014,
title = {Bounding the {{Estimation Error}} of {{Sampling-based Shapley Value Approximation}}},
author = {Maleki, Sasan and Tran-Thanh, Long and Hines, Greg and Rahwan, Talal and Rogers, Alex},
date = {2014-02-12},
journaltitle = {ArXiv13064265 Cs},
eprint = {1306.4265},
eprinttype = {arxiv},
eprintclass = {cs},
url = {https://arxiv.org/abs/1306.4265},
urldate = {2020-11-16},
abstract = {The Shapley value is arguably the most central normative solution concept in cooperative game theory. It specifies a unique way in which the reward from cooperation can be "fairly" divided among players. While it has a wide range of real world applications, its use is in many cases hampered by the hardness of its computation. A number of researchers have tackled this problem by (i) focusing on classes of games where the Shapley value can be computed efficiently, or (ii) proposing representation formalisms that facilitate such efficient computation, or (iii) approximating the Shapley value in certain classes of games. For the classical \textbackslash textit\{characteristic function\} representation, the only attempt to approximate the Shapley value for the general class of games is due to Castro \textbackslash textit\{et al.\} \textbackslash cite\{castro\}. While this algorithm provides a bound on the approximation error, this bound is \textbackslash textit\{asymptotic\}, meaning that it only holds when the number of samples increases to infinity. On the other hand, when a finite number of samples is drawn, an unquantifiable error is introduced, meaning that the bound no longer holds. With this in mind, we provide non-asymptotic bounds on the estimation error for two cases: where (i) the \textbackslash textit\{variance\}, and (ii) the \textbackslash textit\{range\}, of the players' marginal contributions is known. Furthermore, for the second case, we show that when the range is significantly large relative to the Shapley value, the bound can be improved (from \$O(\textbackslash frac\{r\}\{m\})\$ to \$O(\textbackslash sqrt\{\textbackslash frac\{r\}\{m\}\})\$). Finally, we propose, and demonstrate the effectiveness of using stratified sampling for improving the bounds further.}
}

@inproceedings{martens_optimizing_2015,
title = {Optimizing {{Neural Networks}} with {{Kronecker-factored Approximate Curvature}}},
booktitle = {Proceedings of the 32nd {{International Conference}} on {{Machine Learning}}},
Expand Down Expand Up @@ -388,19 +402,23 @@ @inproceedings{okhrati_multilinear_2021
keywords = {notion}
}

@inproceedings{schioppa_scaling_2021,
@article{schioppa_scaling_2022,
title = {Scaling {{Up Influence Functions}}},
author = {Schioppa, Andrea and Zablotskaia, Polina and Vilar, David and Sokolov, Artem},
date = {2021-12-06},
date = {2022-06-28},
journaltitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
shortjournal = {Proc. AAAI Conf. Artif. Intell.},
volume = {36},
number = {8},
eprint = {2112.03052},
eprinttype = {arxiv},
eprintclass = {cs},
publisher = {arXiv},
doi = {10.48550/arXiv.2112.03052},
url = {https://arxiv.org/abs/2112.03052},
urldate = {2023-03-10},
abstract = {We address efficient calculation of influence functions for tracking predictions back to the training data. We propose and analyze a new approach to speeding up the inverse Hessian calculation based on Arnoldi iteration. With this improvement, we achieve, to the best of our knowledge, the first successful implementation of influence functions that scales to full-size (language and vision) Transformer models with several hundreds of millions of parameters. We evaluate our approach on image classification and sequence-to-sequence tasks with tens to a hundred of millions of training examples. Our code will be available at https://github.com/google-research/jax-influence.},
eventtitle = {{{AAAI-22}}},
pages = {8179--8186},
issn = {2374-3468},
doi = {10.1609/aaai.v36i8.20791},
url = {https://ojs.aaai.org/index.php/AAAI/article/view/20791},
urldate = {2024-03-30},
abstract = {We address efficient calculation of influence functions for tracking predictions back to the training data. We propose and analyze a new approach to speeding up the inverse Hessian calculation based on Arnoldi iteration. With this improvement, we achieve, to the best of our knowledge, the first successful implementation of influence functions that scales to full-size (language and vision) Transformer models with several hundreds of millions of parameters. We evaluate our approach in image classification and sequence-to-sequence tasks with tens to a hundred of millions of training examples. Our code is available at https://github.com/google-research/jax-influence.},
langid = {english},
keywords = {notion}
}

Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Terms in data valuation and influence functions:
The Arnoldi method approximately computes eigenvalue, eigenvector pairs of
a symmetric matrix. For influence functions, it is used to approximate
the [iHVP][inverse-hessian-vector-product].
Introduced by [@schioppa_scaling_2021] in the context of influence functions.
Introduced by [@schioppa_scaling_2022] in the context of influence functions.

* [Implementation (torch)
][pydvl.influence.torch.influence_function_model.ArnoldiInfluence]
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ We currently implement the following methods:
[@agarwal_secondorder_2017].

- [**Arnoldi Influence**][pydvl.influence.torch.ArnoldiInfluence]
[@schioppa_scaling_2021].
[@schioppa_scaling_2022].

- [**EKFAC Influence**][pydvl.influence.torch.EkfacInfluence]
[@george_fast_2018;@martens_optimizing_2015].
Expand Down
2 changes: 1 addition & 1 deletion docs/influence/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ intractable for large models like deep neural networks. Much of the recent
research tackles this issue using approximations, like a Neuman series
[@agarwal_secondorder_2017], with the most successful solution using a low-rank
approximation that iteratively finds increasing eigenspaces of the Hessian
[@schioppa_scaling_2021].
[@schioppa_scaling_2022].

pyDVL implements several methods for the efficient computation of the IF for
machine learning. In the examples we document some of the difficulties that can
Expand Down
2 changes: 1 addition & 1 deletion docs/influence/influence_function_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ approximated by

where \(D\) is a diagonal matrix with the top (in absolute value) eigenvalues of
the Hessian and \(V\) contains the corresponding eigenvectors. See also
[@schioppa_scaling_2021].
[@schioppa_scaling_2022].

```python
from pydvl.influence.torch import ArnoldiInfluence
Expand Down

0 comments on commit 9f90bfc

Please sign in to comment.