diff --git a/CHANGES.rst b/CHANGES.rst index fc8978117..578f4db31 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -40,7 +40,7 @@ to allow lazy deserialization of ASDF tagged tree nodes to custom objects. [#1733] -- deprecate ``copy_arrays`` in favor of ``memmap``, and set ``memmap=False`` by default [#1797] +- Deprecate ``copy_arrays`` in favor of ``memmap`` [#1797] 3.2.0 (2024-04-05) ------------------ diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 931a8abbf..61ad2e735 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -75,7 +75,8 @@ def __init__( ignore_version_mismatch=True, ignore_unrecognized_tag=False, ignore_implicit_conversion=NotSet, - memmap=False, + memmap=NotSet, + copy_arrays=NotSet, lazy_load=True, custom_schema=None, ): @@ -117,6 +118,13 @@ def __init__( memmap : bool, optional When `True`, when reading files, attempt to memmap underlying data + arrays when possible. When set, this argument will override + ``copy_arrays``. The default will change to ``False`` in an upcoming + ASDF version. + + copy_arrays : bool, optional + Deprecated; use ``memmap`` instead. + When `False`, when reading files, attempt to memmap underlying data arrays when possible. lazy_load : bool, optional @@ -170,6 +178,13 @@ def __init__( self._fd = None self._closed = False self._external_asdf_by_uri = {} + # if memmap is set, it overrides copy_arrays + if copy_arrays is not NotSet: + warnings.warn("copy_arrays is deprecated; use memmap instead", AsdfDeprecationWarning) + if memmap is NotSet: + memmap = not copy_arrays + elif memmap is NotSet: + memmap = True self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=memmap) # this message is passed into find_references to only warn if # a reference was found diff --git a/asdf/_tests/test_array_blocks.py b/asdf/_tests/test_array_blocks.py index 407bc18bb..2efc4d85b 100644 --- a/asdf/_tests/test_array_blocks.py +++ b/asdf/_tests/test_array_blocks.py @@ -811,9 +811,15 @@ def filename_with_array(tmp_path_factory): @pytest.mark.parametrize( "open_kwargs,should_memmap", [ - ({}, False), + ({}, True), ({"memmap": True}, True), ({"memmap": False}, False), + ({"copy_arrays": True}, False), + ({"copy_arrays": False}, True), + ({"memmap": True, "copy_arrays": True}, True), + ({"memmap": True, "copy_arrays": False}, True), + ({"memmap": False, "copy_arrays": True}, False), + ({"memmap": False, "copy_arrays": False}, False), ], ) def test_open_no_memmap(filename_with_array, open_kwargs, should_memmap): @@ -821,6 +827,8 @@ def test_open_no_memmap(filename_with_array, open_kwargs, should_memmap): Test that asdf.open does not (or does) return memmaps for arrays depending on a number of arguments including: default (no kwargs) + copy_arrays + memmap (overwrites copy_arrays) memmap """ with asdf.open(filename_with_array, lazy_load=False, **open_kwargs) as af: diff --git a/docs/asdf/arrays.rst b/docs/asdf/arrays.rst index a2900a981..4a1a6843e 100644 --- a/docs/asdf/arrays.rst +++ b/docs/asdf/arrays.rst @@ -273,10 +273,10 @@ different compression algorithm when writing the file out again. Memory mapping -------------- -Internal array data can be memory mapped using `numpy.memmap`, by setting -``memmap=True`` in either the `AsdfFile` constructor or `asdf.open`. This allows -for the efficient use of memory even when reading files with very large arrays. -The use of memory mapping means that the following usage pattern is not permitted: +By default, all internal array data is memory mapped using `numpy.memmap`. This +allows for the efficient use of memory even when reading files with very large +arrays. The use of memory mapping means that the following usage pattern is not +permitted: .. code:: @@ -290,3 +290,7 @@ The use of memory mapping means that the following usage pattern is not permitte Specifically, if an ASDF file has been opened using a ``with`` context, it is not possible to access the file contents outside of the scope of that context, because any memory mapped arrays will no longer be available. + +It may sometimes be useful to copy array data into memory instead of using +memory maps. This can be controlled by passing ``memmap=False`` to either +the `AsdfFile` constructor or `asdf.open`. By default, ``memmap=True``.