Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate copy_arrays with warning #1797

Merged
merged 16 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
to allow lazy deserialization of ASDF tagged tree nodes to
custom objects. [#1733]

- Deprecate ``copy_arrays`` in favor of ``memmap`` [#1797]

3.2.0 (2024-04-05)
------------------
Expand Down
11 changes: 9 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ The `open` function also works as a context handler:
with asdf.open("example.asdf") as af:
...

.. warning::
The ``copy_arrays`` argument of `asdf.open()` and `AsdfFile` is deprecated,
and will be removed in ASDF 4.0. It is replaced by ``memmap``, which
is the opposite of ``copy_arrays`` (``memmap == not copy_arrays``).
In ASDF 4.0, ``memmap`` will default to ``False``, which means arrays
will no longer be memory-mapped by default.

To get a quick overview of the data stored in the file, use the top-level
`AsdfFile.info()` method:

Expand Down Expand Up @@ -245,12 +252,12 @@ Array data remains unloaded until it is explicitly accessed:
True

By default, uncompressed data blocks are memory mapped for efficient
access. Memory mapping can be disabled by using the ``copy_arrays``
access. Memory mapping can be disabled by using the ``memmap``
option of `open` when reading:

.. code:: python

af = asdf.open("example.asdf", copy_arrays=True)
af = asdf.open("example.asdf", memmap=False)

.. _end-read-file-text:

Expand Down
33 changes: 18 additions & 15 deletions asdf/_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
ignore_version_mismatch=True,
ignore_unrecognized_tag=False,
ignore_implicit_conversion=NotSet,
copy_arrays=False,
zacharyburnett marked this conversation as resolved.
Show resolved Hide resolved
copy_arrays=NotSet,
zacharyburnett marked this conversation as resolved.
Show resolved Hide resolved
memmap=NotSet,
lazy_load=True,
custom_schema=None,
Expand Down Expand Up @@ -117,16 +117,15 @@ def __init__(
as-is.

copy_arrays : bool, optional
Deprecated; use ``memmap`` instead.
When `False`, when reading files, attempt to memmap underlying data
arrays when possible.

memmap : bool, optional
When `True`, when reading files, attempt to memmap underlying data
arrays when possible. When set, this argument will override
``copy_arrays``. When not set, the ``copy_arrays`` will determine
if arrays are memory mapped or copied. ``copy_arrays`` will be
deprecated and the default will change in an upcoming asdf version
which by default will not memory map arrays.
``copy_arrays``. The default will change to ``False`` in an upcoming
ASDF version. At the moment the default is ``True``.

lazy_load : bool, optional
When `True` and the underlying file handle is seekable, data
Expand All @@ -135,8 +134,6 @@ def __init__(
open during the lifetime of the tree. Setting to False causes
all data arrays to be loaded up front, which means that they
can be accessed even after the underlying file is closed.
Note: even if ``lazy_load`` is `False`, ``copy_arrays`` is still taken
into account.

custom_schema : str, optional
Path to a custom schema file that will be used for a secondary
Expand Down Expand Up @@ -182,9 +179,16 @@ def __init__(
self._closed = False
self._external_asdf_by_uri = {}
# if memmap is set, it overrides copy_arrays
if memmap is not NotSet:
copy_arrays = not memmap
self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=not copy_arrays)
if copy_arrays is not NotSet:
warnings.warn(
"copy_arrays is deprecated; use memmap instead. Note that memmap will default to False in asdf 4.0.",
AsdfWarning,
)
if memmap is NotSet:
memmap = not copy_arrays
elif memmap is NotSet:
memmap = True
self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=memmap)
# this message is passed into find_references to only warn if
# a reference was found
find_ref_warning_msg = (
Expand Down Expand Up @@ -1618,7 +1622,7 @@ def open_asdf(
ignore_version_mismatch=True,
ignore_unrecognized_tag=False,
_force_raw_types=False,
copy_arrays=False,
copy_arrays=NotSet,
memmap=NotSet,
lazy_tree=NotSet,
lazy_load=True,
Expand Down Expand Up @@ -1661,16 +1665,15 @@ def open_asdf(
`False` by default.

copy_arrays : bool, optional
Deprecated; use ``memmap`` instead.
When `False`, when reading files, attempt to memmap underlying data
arrays when possible.

memmap : bool, optional
When `True`, when reading files, attempt to memmap underlying data
arrays when possible. When set, this argument will override
``copy_arrays``. When not set, the ``copy_arrays`` will determine
if arrays are memory mapped or copied. ``copy_arrays`` will be
deprecated and the default will change in an upcoming asdf version
which by default will not memory map arrays.
``copy_arrays``. The default will change to ``False`` in an upcoming
ASDF version. At the moment the default is ``True``.

lazy_load : bool, optional
When `True` and the underlying file handle is seekable, data
Expand Down
2 changes: 1 addition & 1 deletion asdf/_tests/_regtests/test_1334.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_memmap_view_access_after_close(tmp_path):
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"a": a}).write_to(fn)

with asdf.open(fn, copy_arrays=False) as af:
with asdf.open(fn, memmap=True) as af:
v = af["a"][:5]

assert np.all(v == 1)
8 changes: 4 additions & 4 deletions asdf/_tests/_regtests/test_1525.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import asdf


@pytest.mark.parametrize("copy_arrays", [True, False])
def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, copy_arrays):
@pytest.mark.parametrize("memmap", [True, False])
def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, memmap):
"""
External blocks are always lazy loaded and memmapped

Expand All @@ -18,13 +18,13 @@ def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, copy_arrays)
af.set_array_storage(arr, "external")
af.write_to(fn)

with asdf.open(fn, copy_arrays=copy_arrays) as af:
with asdf.open(fn, memmap=memmap) as af:
# check that block is external
source = af["arr"]._source
assert isinstance(source, str)

# check if block is memmapped
if copy_arrays:
if not memmap:
assert not isinstance(af["arr"].base, np.memmap)
else:
assert isinstance(af["arr"].base, np.memmap)
2 changes: 1 addition & 1 deletion asdf/_tests/_regtests/test_1530.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path):
af = asdf.AsdfFile({"a": a, "b": b})
af.write_to(fn)

with asdf.open(fn, mode="rw", copy_arrays=False) as af:
with asdf.open(fn, mode="rw", memmap=True) as af:
va = af["a"][:3]
np.testing.assert_array_equal(a, af["a"])
np.testing.assert_array_equal(b, af["b"])
Expand Down
2 changes: 1 addition & 1 deletion asdf/_tests/_regtests/test_1558.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_asdffile_tree_cleared_on_close(tmp_path):
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"a": np.arange(1000), "b": np.arange(42)}).write_to(fn)

with asdf.open(fn, copy_arrays=True, lazy_load=False) as af:
with asdf.open(fn, memmap=False, lazy_load=False) as af:
array_weakref = weakref.ref(af["a"])
array_ref = af["b"]

Expand Down
10 changes: 5 additions & 5 deletions asdf/_tests/tags/core/tests/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,16 +980,16 @@ def test_memmap_write(tmp_path):
# Make sure we're actually writing to an internal array for this test
af.write_to(tmpfile, all_array_storage="internal")

with asdf.open(tmpfile, mode="rw", copy_arrays=False) as af:
with asdf.open(tmpfile, mode="rw", memmap=True) as af:
data = af["data"]
assert data.flags.writeable is True
data[0] = 42
assert data[0] == 42

with asdf.open(tmpfile, mode="rw", copy_arrays=False) as af:
with asdf.open(tmpfile, mode="rw", memmap=True) as af:
assert af["data"][0] == 42

with asdf.open(tmpfile, mode="r", copy_arrays=False) as af:
with asdf.open(tmpfile, mode="r", memmap=True) as af:
assert af["data"][0] == 42


Expand All @@ -1008,7 +1008,7 @@ def test_readonly(tmp_path):
af["data"][0] = 41

# Forcing memmap, the array should still be readonly
with asdf.open(tmpfile, copy_arrays=False) as af:
with asdf.open(tmpfile, memmap=True) as af:
assert af["data"].flags.writeable is False
with pytest.raises(ValueError, match=r"assignment destination is read-only"):
af["data"][0] = 41
Expand All @@ -1019,7 +1019,7 @@ def test_readonly(tmp_path):
af["data"][0] = 40

# Copying the arrays makes it safe to write to the underlying array
with asdf.open(tmpfile, mode="r", copy_arrays=True) as af:
with asdf.open(tmpfile, mode="r", memmap=False) as af:
assert af["data"].flags.writeable is True
af["data"][0] = 42

Expand Down
4 changes: 2 additions & 2 deletions asdf/_tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_update_exceptions(tmp_path):
ff.write_to(path)

with (
asdf.open(path, mode="r", copy_arrays=True) as ff,
asdf.open(path, mode="r", memmap=False) as ff,
pytest.raises(
IOError,
match=r"Can not update, since associated file is read-only.*",
Expand Down Expand Up @@ -500,7 +500,7 @@ def test_array_access_after_file_close(tmp_path):

# With memory mapping disabled and copying arrays enabled,
# the array data should still persist in memory after close:
with asdf.open(path, lazy_load=False, copy_arrays=True) as af:
with asdf.open(path, lazy_load=False, memmap=False) as af:
tree = af.tree
assert_array_equal(tree["data"], data)

Expand Down
Loading
Loading