From cb6b56040dfc0fcbdacc41d886037868924f80ec Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 10:36:26 +0800 Subject: [PATCH 01/14] remove custom encoding warning after python 3.10+ --- src/monty/io.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/monty/io.py b/src/monty/io.py index ccd24189e..17021fcaf 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -22,9 +22,6 @@ from typing import IO, Any, Iterator, Union -class EncodingWarning(Warning): ... # Added in Python 3.10 - - def zopen( filename: Union[str, Path], /, From 412c18bbaa5616926c86df9b5d16b45cd94394a1 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 10:37:23 +0800 Subject: [PATCH 02/14] check PYTHONWARNDEFAULTENCODING --- src/monty/io.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/monty/io.py b/src/monty/io.py index 17021fcaf..f7f6fbcd5 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -76,8 +76,12 @@ def zopen( stacklevel=2, ) - # Warn against default `encoding` in text mode - if "t" in mode and kwargs.get("encoding", None) is None: + # Warn against default `encoding` in text mode if `PYTHONWARNDEFAULTENCODING` is set + if ( + os.getenv("PYTHONWARNDEFAULTENCODING", False) + and "t" in mode + and kwargs.get("encoding", None) is None + ): warnings.warn( "We strongly encourage explicit `encoding`, " "and we would use UTF-8 by default as per PEP 686", From 3e1ffc597f90bf278cd662826d19f0a219630ebe Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 13:05:45 +0800 Subject: [PATCH 03/14] fix test workflow otherwise cannot run --- .github/workflows/test.yml | 4 ++-- tests/test_io.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31062030c..1df293f3a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,10 +16,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python }} + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python }} + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | diff --git a/tests/test_io.py b/tests/test_io.py index f4422bf2a..5f3f78c13 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -9,7 +9,6 @@ import pytest from monty.io import ( - EncodingWarning, FileLock, FileLockException, _get_line_ending, From 2cd39ce35cead3a6046e9523170fb76f5f168e6a Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 17:34:46 +0800 Subject: [PATCH 04/14] test encoding warning with env var --- src/monty/io.py | 7 ++++--- tests/test_io.py | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/monty/io.py b/src/monty/io.py index f7f6fbcd5..94740c552 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -76,11 +76,12 @@ def zopen( stacklevel=2, ) - # Warn against default `encoding` in text mode if `PYTHONWARNDEFAULTENCODING` is set + # Warn against default `encoding` in text mode if + # `PYTHONWARNDEFAULTENCODING` environment variable is set (PEP 597) if ( - os.getenv("PYTHONWARNDEFAULTENCODING", False) - and "t" in mode + "t" in mode and kwargs.get("encoding", None) is None + and os.getenv("PYTHONWARNDEFAULTENCODING", False) ): warnings.warn( "We strongly encourage explicit `encoding`, " diff --git a/tests/test_io.py b/tests/test_io.py index 5f3f78c13..5617e847a 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -426,16 +426,19 @@ def test_lzw_files(self): # Cannot decompress a real LZW file with ( pytest.raises(gzip.BadGzipFile, match="Not a gzipped file"), + pytest.warns(FutureWarning, match="compress LZW-compressed files"), zopen(f"{TEST_DIR}/real_lzw_file.txt.Z", "rt", encoding="utf-8") as f, ): f.read() @pytest.mark.parametrize("extension", [".txt", ".bz2", ".gz", ".xz", ".lzma"]) - def test_warnings(self, extension): + def test_warnings(self, extension, monkeypatch): filename = f"test_warning{extension}" content = "Test warning" with ScratchDir("."): + monkeypatch.setenv("PYTHONWARNDEFAULTENCODING", "1") + # Default `encoding` warning with ( pytest.warns(EncodingWarning, match="use UTF-8 by default"), @@ -443,6 +446,19 @@ def test_warnings(self, extension): ): f.write(content) + # No encoding warning if `PYTHONWARNDEFAULTENCODING` not set + monkeypatch.delenv("PYTHONWARNDEFAULTENCODING", raising=False) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "error", + "We strongly encourage explicit `encoding`", + EncodingWarning, + ) + + with zopen(filename, "wt") as f: + f.write(content) + # Implicit text/binary `mode` warning warnings.filterwarnings( "ignore", category=EncodingWarning, message="argument not specified" From 7f7c1f12a7ab25c040feba19c00115398036dc70 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 17:40:41 +0800 Subject: [PATCH 05/14] PEP 604, | over Union type --- src/monty/functools.py | 4 ++-- src/monty/io.py | 6 +++--- src/monty/os/__init__.py | 6 +++--- src/monty/os/path.py | 4 ++-- src/monty/serialization.py | 6 +++--- src/monty/string.py | 4 ++-- src/monty/tempfile.py | 5 +---- 7 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/monty/functools.py b/src/monty/functools.py index 969f0d506..13c188dac 100644 --- a/src/monty/functools.py +++ b/src/monty/functools.py @@ -13,7 +13,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, Union + from typing import Any, Callable class _HashedSeq(list): # pylint: disable=C0205 @@ -130,7 +130,7 @@ def invalidate(cls, inst: object, name: str) -> None: def return_if_raise( - exception_tuple: Union[list, tuple], retval_if_exc: Any, disabled: bool = False + exception_tuple: list | tuple, retval_if_exc: Any, disabled: bool = False ) -> Any: """ Decorator for functions, methods or properties. Execute the callable in a diff --git a/src/monty/io.py b/src/monty/io.py index 94740c552..24b796b1a 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -19,11 +19,11 @@ from typing import TYPE_CHECKING, Literal, cast if TYPE_CHECKING: - from typing import IO, Any, Iterator, Union + from typing import IO, Any, Iterator def zopen( - filename: Union[str, Path], + filename: str | Path, /, mode: str | None = None, **kwargs: Any, @@ -171,7 +171,7 @@ def _get_line_ending( def reverse_readfile( - filename: Union[str, Path], + filename: str | Path, ) -> Iterator[str]: """ A much faster reverse read of file by using Python's mmap to generate a diff --git a/src/monty/os/__init__.py b/src/monty/os/__init__.py index 0158cec7a..24539971d 100644 --- a/src/monty/os/__init__.py +++ b/src/monty/os/__init__.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from pathlib import Path - from typing import Generator, Union + from typing import Generator __author__ = "Shyue Ping Ong" __copyright__ = "Copyright 2013, The Materials Project" @@ -22,7 +22,7 @@ @contextmanager -def cd(path: Union[str, Path]) -> Generator: +def cd(path: str | Path) -> Generator: """ A Fabric-inspired cd context that temporarily changes directory for performing some tasks, and returns to the original working directory @@ -42,7 +42,7 @@ def cd(path: Union[str, Path]) -> Generator: os.chdir(cwd) -def makedirs_p(path: Union[str, Path], **kwargs) -> None: +def makedirs_p(path: str | Path, **kwargs) -> None: """ Wrapper for os.makedirs that does not raise an exception if the directory already exists, in the fashion of "mkdir -p" command. The check is diff --git a/src/monty/os/path.py b/src/monty/os/path.py index fa293a90f..1fea9fcf9 100644 --- a/src/monty/os/path.py +++ b/src/monty/os/path.py @@ -12,7 +12,7 @@ from monty.string import list_strings if TYPE_CHECKING: - from typing import Callable, Literal, Optional, Union + from typing import Callable, Literal, Optional def zpath(filename: str | Path) -> str: @@ -41,7 +41,7 @@ def zpath(filename: str | Path) -> str: def find_exts( top: str, - exts: Union[str, list[str]], + exts: str | list[str], exclude_dirs: Optional[str] = None, include_dirs: Optional[str] = None, match_mode: Literal["basename", "abspath"] = "basename", diff --git a/src/monty/serialization.py b/src/monty/serialization.py index 13f3fb004..6dc1fa18e 100644 --- a/src/monty/serialization.py +++ b/src/monty/serialization.py @@ -22,10 +22,10 @@ if TYPE_CHECKING: from pathlib import Path - from typing import Any, Optional, TextIO, Union + from typing import Any, Optional, TextIO -def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> Any: +def loadfn(fn: str | Path, *args, fmt: Optional[str] = None, **kwargs) -> Any: """ Loads json/yaml/msgpack directly from a filename instead of a File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z") @@ -81,7 +81,7 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> raise TypeError(f"Invalid format: {fmt}") -def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None: +def dumpfn(obj: object, fn: str | Path, *args, fmt=None, **kwargs) -> None: """ Dump to a json/yaml directly by filename instead of a File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z") diff --git a/src/monty/string.py b/src/monty/string.py index 1a56debca..72a6b42e0 100644 --- a/src/monty/string.py +++ b/src/monty/string.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Iterable, cast if TYPE_CHECKING: - from typing import Any, Union + from typing import Any def remove_non_ascii(s: str) -> str: @@ -34,7 +34,7 @@ def is_string(s: Any) -> bool: return False -def list_strings(arg: Union[str, Iterable[str]]) -> list[str]: +def list_strings(arg: str | Iterable[str]) -> list[str]: """ Always return a list of strings, given a string or list of strings as input. diff --git a/src/monty/tempfile.py b/src/monty/tempfile.py index 979dc5126..f5ac9556e 100644 --- a/src/monty/tempfile.py +++ b/src/monty/tempfile.py @@ -11,9 +11,6 @@ from monty.shutil import copy_r, gzip_dir, remove -if TYPE_CHECKING: - from typing import Union - class ScratchDir: """ @@ -42,7 +39,7 @@ class ScratchDir: def __init__( self, - rootpath: Union[str, Path, None], + rootpath: str | Path | None, create_symbolic_link: bool = False, copy_from_current_on_enter: bool = False, copy_to_current_on_exit: bool = False, From f684521e686faa2db04b4ef2a3975f534c5213ba Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 17:46:36 +0800 Subject: [PATCH 06/14] Literal type for fmt --- src/monty/serialization.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/monty/serialization.py b/src/monty/serialization.py index 6dc1fa18e..82b43b1c4 100644 --- a/src/monty/serialization.py +++ b/src/monty/serialization.py @@ -22,10 +22,15 @@ if TYPE_CHECKING: from pathlib import Path - from typing import Any, Optional, TextIO + from typing import Any, Literal, TextIO -def loadfn(fn: str | Path, *args, fmt: Optional[str] = None, **kwargs) -> Any: +def loadfn( + fn: str | Path, + *args, + fmt: Literal["json", "yaml", "mpk"] | None = None, + **kwargs, +) -> Any: """ Loads json/yaml/msgpack directly from a filename instead of a File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z") @@ -39,9 +44,8 @@ def loadfn(fn: str | Path, *args, fmt: Optional[str] = None, **kwargs) -> Any: Args: fn (str/Path): filename or pathlib.Path. *args: Any of the args supported by json/yaml.load. - fmt (string): If specified, the fmt specified would be used instead - of autodetection from filename. Supported formats right now are - "json", "yaml" or "mpk". + fmt ("json" | "yaml" | "mpk"): If specified, the fmt specified would + be used instead of autodetection from filename. **kwargs: Any of the kwargs supported by json/yaml.load. Returns: @@ -81,7 +85,13 @@ def loadfn(fn: str | Path, *args, fmt: Optional[str] = None, **kwargs) -> Any: raise TypeError(f"Invalid format: {fmt}") -def dumpfn(obj: object, fn: str | Path, *args, fmt=None, **kwargs) -> None: +def dumpfn( + obj: object, + fn: str | Path, + *args, + fmt: Literal["json", "yaml", "mpk"] | None = None, + **kwargs, +) -> None: """ Dump to a json/yaml directly by filename instead of a File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z") @@ -95,6 +105,8 @@ def dumpfn(obj: object, fn: str | Path, *args, fmt=None, **kwargs) -> None: Args: obj (object): Object to dump. fn (str/Path): filename or pathlib.Path. + fmt ("json" | "yaml" | "mpk"): If specified, the fmt specified would + be used instead of autodetection from filename. *args: Any of the args supported by json/yaml.dump. **kwargs: Any of the kwargs supported by json/yaml.dump. From c4bedb89dbe9e977e80e0fd3e23f187a61c215c2 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 17:48:30 +0800 Subject: [PATCH 07/14] implicit optional | None --- src/monty/bisect.py | 6 +----- src/monty/dev.py | 8 ++++---- src/monty/os/path.py | 6 +++--- src/monty/shutil.py | 6 +++--- src/monty/subprocess.py | 4 +--- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/monty/bisect.py b/src/monty/bisect.py index 6261d8c2b..5047a8870 100644 --- a/src/monty/bisect.py +++ b/src/monty/bisect.py @@ -10,10 +10,6 @@ from __future__ import annotations import bisect as bs -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from typing import Optional __author__ = "Matteo Giantomassi" __copyright__ = "Copyright 2013, The Materials Virtual Lab" @@ -23,7 +19,7 @@ __date__ = "11/09/14" -def index(a: list[float], x: float, atol: Optional[float] = None) -> int: +def index(a: list[float], x: float, atol: float | None = None) -> int: """Locate the leftmost value exactly equal to x.""" i = bs.bisect_left(a, x) if i != len(a): diff --git a/src/monty/dev.py b/src/monty/dev.py index cf468bce4..c7525864b 100644 --- a/src/monty/dev.py +++ b/src/monty/dev.py @@ -17,15 +17,15 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Callable, Optional, Type + from typing import Callable, Type logger = logging.getLogger(__name__) def deprecated( - replacement: Optional[Callable | str] = None, + replacement: Callable | str | None = None, message: str = "", - deadline: Optional[tuple[int, int, int]] = None, + deadline: tuple[int, int, int] | None = None, category: Type[Warning] = FutureWarning, ) -> Callable: """ @@ -34,7 +34,7 @@ def deprecated( Args: replacement (Callable | str): A replacement class or function. message (str): A warning message to be displayed. - deadline (Optional[tuple[int, int, int]]): Optional deadline for removal + deadline (tuple[int, int, int] | None): Optional deadline for removal of the old function/class, in format (yyyy, MM, dd). A CI warning would be raised after this date if is running in code owner' repo. category (Warning): Choose the category of the warning to issue. Defaults diff --git a/src/monty/os/path.py b/src/monty/os/path.py index 1fea9fcf9..5b3c0cb4e 100644 --- a/src/monty/os/path.py +++ b/src/monty/os/path.py @@ -12,7 +12,7 @@ from monty.string import list_strings if TYPE_CHECKING: - from typing import Callable, Literal, Optional + from typing import Callable, Literal def zpath(filename: str | Path) -> str: @@ -42,8 +42,8 @@ def zpath(filename: str | Path) -> str: def find_exts( top: str, exts: str | list[str], - exclude_dirs: Optional[str] = None, - include_dirs: Optional[str] = None, + exclude_dirs: str | None = None, + include_dirs: str | None = None, match_mode: Literal["basename", "abspath"] = "basename", ) -> list[str]: """ diff --git a/src/monty/shutil.py b/src/monty/shutil.py index 98047f576..7353b7b06 100644 --- a/src/monty/shutil.py +++ b/src/monty/shutil.py @@ -12,7 +12,7 @@ from monty.io import zopen if TYPE_CHECKING: - from typing import Literal, Optional + from typing import Literal def copy_r(src: str | Path, dst: str | Path) -> None: @@ -76,7 +76,7 @@ def gzip_dir(path: str | Path, compresslevel: int = 6) -> None: def compress_file( filepath: str | Path, compression: Literal["gz", "bz2"] = "gz", - target_dir: Optional[str | Path] = None, + target_dir: str | Path | None = None, ) -> None: """ Compresses a file with the correct extension. Functions like standard @@ -130,7 +130,7 @@ def compress_dir(path: str | Path, compression: Literal["gz", "bz2"] = "gz") -> def decompress_file( - filepath: str | Path, target_dir: Optional[str | Path] = None + filepath: str | Path, target_dir: str | Path | None = None ) -> str | None: """ Decompresses a file with the correct extension. Automatically detects diff --git a/src/monty/subprocess.py b/src/monty/subprocess.py index 104261c2e..e17f95c55 100644 --- a/src/monty/subprocess.py +++ b/src/monty/subprocess.py @@ -13,8 +13,6 @@ from monty.string import is_string if TYPE_CHECKING: - from typing import Optional - from typing_extensions import Self __author__ = "Matteo Giantomass" @@ -63,7 +61,7 @@ def __init__(self, command: str): def __str__(self): return f"command: {self.command}, retcode: {self.retcode}" - def run(self, timeout: Optional[float] = None, **kwargs) -> Self: + def run(self, timeout: float | None = None, **kwargs) -> Self: """ Run a command in a separated thread and wait timeout seconds. kwargs are keyword arguments passed to Popen. From a4d5719d8f7a296e12577511ad385f79bdc66f4c Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 20 Dec 2024 18:06:20 +0800 Subject: [PATCH 08/14] prefer mode as kwarg --- src/monty/io.py | 4 ++-- src/monty/re.py | 2 +- src/monty/serialization.py | 8 ++++---- src/monty/shutil.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/monty/io.py b/src/monty/io.py index 24b796b1a..800044ea3 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -143,7 +143,7 @@ def _get_line_ending( If file is empty, "\n" would be used as default. """ if isinstance(file, (str, Path)): - with zopen(file, "rb") as f: + with zopen(file, mode="rb") as f: first_line = f.readline() elif isinstance(file, io.TextIOWrapper): first_line = file.buffer.readline() # type: ignore[attr-defined] @@ -189,7 +189,7 @@ def reverse_readfile( l_end = _get_line_ending(filename) len_l_end = len(l_end) - with zopen(filename, "rb") as file: + with zopen(filename, mode="rb") as file: if isinstance(file, (gzip.GzipFile, bz2.BZ2File)): for line in reversed(file.readlines()): # "readlines" would keep the line end character diff --git a/src/monty/re.py b/src/monty/re.py index 76152ff13..feeaed248 100644 --- a/src/monty/re.py +++ b/src/monty/re.py @@ -49,7 +49,7 @@ def regrep( gen = ( reverse_readfile(filename) if reverse - else zopen(filename, "rt", encoding="utf-8") + else zopen(filename, mode="rt", encoding="utf-8") ) for i, line in enumerate(gen): for k, p in compiled.items(): diff --git a/src/monty/serialization.py b/src/monty/serialization.py index 82b43b1c4..ff2a53de2 100644 --- a/src/monty/serialization.py +++ b/src/monty/serialization.py @@ -68,10 +68,10 @@ def loadfn( ) if "object_hook" not in kwargs: kwargs["object_hook"] = object_hook - with zopen(fn, "rb") as fp: + with zopen(fn, mode="rb") as fp: return msgpack.load(fp, *args, **kwargs) # pylint: disable=E1101 else: - with zopen(fn, "rt", encoding="utf-8") as fp: + with zopen(fn, mode="rt", encoding="utf-8") as fp: if fmt == "yaml": if YAML is None: raise RuntimeError("Loading of YAML files requires ruamel.yaml.") @@ -129,10 +129,10 @@ def dumpfn( ) if "default" not in kwargs: kwargs["default"] = default - with zopen(fn, "wb") as fp: + with zopen(fn, mode="wb") as fp: msgpack.dump(obj, fp, *args, **kwargs) # pylint: disable=E1101 else: - with zopen(fn, "wt", encoding="utf-8") as fp: + with zopen(fn, mode="wt", encoding="utf-8") as fp: fp = cast(TextIO, fp) if fmt == "yaml": diff --git a/src/monty/shutil.py b/src/monty/shutil.py index 7353b7b06..5dc1c8c07 100644 --- a/src/monty/shutil.py +++ b/src/monty/shutil.py @@ -104,7 +104,7 @@ def compress_file( else: compressed_file = f"{str(filepath)}.{compression}" - with open(filepath, "rb") as f_in, zopen(compressed_file, "wb") as f_out: + with open(filepath, "rb") as f_in, zopen(compressed_file, mode="wb") as f_out: f_out.writelines(f_in) os.remove(filepath) @@ -157,7 +157,7 @@ def decompress_file( else: decompressed_file = str(filepath).removesuffix(file_ext) - with zopen(filepath, "rb") as f_in, open(decompressed_file, "wb") as f_out: + with zopen(filepath, mode="rb") as f_in, open(decompressed_file, "wb") as f_out: f_out.writelines(f_in) os.remove(filepath) From 73294c93353c6b5cde1bd2583329bae3fa56fb57 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 21 Dec 2024 13:08:28 +0800 Subject: [PATCH 09/14] fix env var check --- src/monty/io.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/monty/io.py b/src/monty/io.py index 800044ea3..d845423f2 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -78,17 +78,14 @@ def zopen( # Warn against default `encoding` in text mode if # `PYTHONWARNDEFAULTENCODING` environment variable is set (PEP 597) - if ( - "t" in mode - and kwargs.get("encoding", None) is None - and os.getenv("PYTHONWARNDEFAULTENCODING", False) - ): - warnings.warn( - "We strongly encourage explicit `encoding`, " - "and we would use UTF-8 by default as per PEP 686", - category=EncodingWarning, - stacklevel=2, - ) + if "t" in mode and kwargs.get("encoding", None) is None: + if os.getenv("PYTHONWARNDEFAULTENCODING", False): + warnings.warn( + "We strongly encourage explicit `encoding`, " + "and we would use UTF-8 by default as per PEP 686", + category=EncodingWarning, + stacklevel=2, + ) kwargs["encoding"] = "utf-8" _name, ext = os.path.splitext(filename) From 8bc076348aa00d5ec6b4a9e4b328f58f02a2b73c Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 21 Dec 2024 17:44:24 +0800 Subject: [PATCH 10/14] document default encoding --- src/monty/io.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/monty/io.py b/src/monty/io.py index d845423f2..b22ea6231 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -32,7 +32,7 @@ def zopen( This function wraps around `[bz2/gzip/lzma].open` and `open` to deal intelligently with compressed or uncompressed files. Supports context manager: - `with zopen(filename, mode="rt", ...)`. + `with zopen(filename, mode="rt", ...)` Important Notes: - Default `mode` should not be used, and would not be allow @@ -40,11 +40,12 @@ def zopen( - Always explicitly specify binary/text in `mode`, i.e. always pass `t` or `b` in `mode`, implicit binary/text mode would not be allow in future versions. - - Always provide an explicit `encoding` in text mode. + - Always provide an explicit `encoding` in text mode, it would + be set to UTF-8 by default otherwise. Args: filename (str | Path): The file to open. - mode (str): The mode in which the file is opened, you MUST + mode (str): The mode in which the file is opened, you should explicitly specify "b" for binary or "t" for text. **kwargs: Additional keyword arguments to pass to `open`. From 2c936202e4bba63333210f13daec76e634d22b14 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 4 Jan 2025 14:33:26 +0800 Subject: [PATCH 11/14] fix merge error --- tests/test_io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_io.py b/tests/test_io.py index 98dfa9c0a..5b3e6d3a8 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -427,7 +427,6 @@ def test_lzw_files(self): with ( pytest.warns(FutureWarning, match="compress LZW-compressed files"), pytest.raises(gzip.BadGzipFile, match="Not a gzipped file"), - pytest.warns(FutureWarning, match="compress LZW-compressed files"), zopen(f"{TEST_DIR}/real_lzw_file.txt.Z", "rt", encoding="utf-8") as f, ): f.read() From a2ac074de62895dd5d9bf1d32a3466ec4719536a Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 10 Jan 2025 11:49:44 +0800 Subject: [PATCH 12/14] Revert "PEP 604, | over Union type" This reverts commit 7f7c1f12a7ab25c040feba19c00115398036dc70. --- src/monty/functools.py | 4 ++-- src/monty/io.py | 6 +++--- src/monty/os/__init__.py | 6 +++--- src/monty/os/path.py | 8 ++++---- src/monty/serialization.py | 6 +++--- src/monty/string.py | 4 ++-- src/monty/tempfile.py | 5 ++++- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/monty/functools.py b/src/monty/functools.py index 13c188dac..969f0d506 100644 --- a/src/monty/functools.py +++ b/src/monty/functools.py @@ -13,7 +13,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable + from typing import Any, Callable, Union class _HashedSeq(list): # pylint: disable=C0205 @@ -130,7 +130,7 @@ def invalidate(cls, inst: object, name: str) -> None: def return_if_raise( - exception_tuple: list | tuple, retval_if_exc: Any, disabled: bool = False + exception_tuple: Union[list, tuple], retval_if_exc: Any, disabled: bool = False ) -> Any: """ Decorator for functions, methods or properties. Execute the callable in a diff --git a/src/monty/io.py b/src/monty/io.py index 86600abb1..1f3981d9d 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -19,11 +19,11 @@ from typing import TYPE_CHECKING, Literal, cast if TYPE_CHECKING: - from typing import IO, Any, Iterator + from typing import IO, Any, Iterator, Union def zopen( - filename: str | Path, + filename: Union[str, Path], /, mode: str | None = None, **kwargs: Any, @@ -169,7 +169,7 @@ def _get_line_ending( def reverse_readfile( - filename: str | Path, + filename: Union[str, Path], ) -> Iterator[str]: """ A much faster reverse read of file by using Python's mmap to generate a diff --git a/src/monty/os/__init__.py b/src/monty/os/__init__.py index 24539971d..0158cec7a 100644 --- a/src/monty/os/__init__.py +++ b/src/monty/os/__init__.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from pathlib import Path - from typing import Generator + from typing import Generator, Union __author__ = "Shyue Ping Ong" __copyright__ = "Copyright 2013, The Materials Project" @@ -22,7 +22,7 @@ @contextmanager -def cd(path: str | Path) -> Generator: +def cd(path: Union[str, Path]) -> Generator: """ A Fabric-inspired cd context that temporarily changes directory for performing some tasks, and returns to the original working directory @@ -42,7 +42,7 @@ def cd(path: str | Path) -> Generator: os.chdir(cwd) -def makedirs_p(path: str | Path, **kwargs) -> None: +def makedirs_p(path: Union[str, Path], **kwargs) -> None: """ Wrapper for os.makedirs that does not raise an exception if the directory already exists, in the fashion of "mkdir -p" command. The check is diff --git a/src/monty/os/path.py b/src/monty/os/path.py index 5b3c0cb4e..fa293a90f 100644 --- a/src/monty/os/path.py +++ b/src/monty/os/path.py @@ -12,7 +12,7 @@ from monty.string import list_strings if TYPE_CHECKING: - from typing import Callable, Literal + from typing import Callable, Literal, Optional, Union def zpath(filename: str | Path) -> str: @@ -41,9 +41,9 @@ def zpath(filename: str | Path) -> str: def find_exts( top: str, - exts: str | list[str], - exclude_dirs: str | None = None, - include_dirs: str | None = None, + exts: Union[str, list[str]], + exclude_dirs: Optional[str] = None, + include_dirs: Optional[str] = None, match_mode: Literal["basename", "abspath"] = "basename", ) -> list[str]: """ diff --git a/src/monty/serialization.py b/src/monty/serialization.py index ff2a53de2..72c2d104f 100644 --- a/src/monty/serialization.py +++ b/src/monty/serialization.py @@ -22,11 +22,11 @@ if TYPE_CHECKING: from pathlib import Path - from typing import Any, Literal, TextIO + from typing import Any, Literal, TextIO, Union def loadfn( - fn: str | Path, + fn: Union[str, Path], *args, fmt: Literal["json", "yaml", "mpk"] | None = None, **kwargs, @@ -87,7 +87,7 @@ def loadfn( def dumpfn( obj: object, - fn: str | Path, + fn: Union[str, Path], *args, fmt: Literal["json", "yaml", "mpk"] | None = None, **kwargs, diff --git a/src/monty/string.py b/src/monty/string.py index 72a6b42e0..1a56debca 100644 --- a/src/monty/string.py +++ b/src/monty/string.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Iterable, cast if TYPE_CHECKING: - from typing import Any + from typing import Any, Union def remove_non_ascii(s: str) -> str: @@ -34,7 +34,7 @@ def is_string(s: Any) -> bool: return False -def list_strings(arg: str | Iterable[str]) -> list[str]: +def list_strings(arg: Union[str, Iterable[str]]) -> list[str]: """ Always return a list of strings, given a string or list of strings as input. diff --git a/src/monty/tempfile.py b/src/monty/tempfile.py index f5ac9556e..979dc5126 100644 --- a/src/monty/tempfile.py +++ b/src/monty/tempfile.py @@ -11,6 +11,9 @@ from monty.shutil import copy_r, gzip_dir, remove +if TYPE_CHECKING: + from typing import Union + class ScratchDir: """ @@ -39,7 +42,7 @@ class ScratchDir: def __init__( self, - rootpath: str | Path | None, + rootpath: Union[str, Path, None], create_symbolic_link: bool = False, copy_from_current_on_enter: bool = False, copy_to_current_on_exit: bool = False, From f805352730351f05e6578f6dc3cfaad4c26a1e82 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 10 Jan 2025 11:50:15 +0800 Subject: [PATCH 13/14] Revert "implicit optional | None" This reverts commit c4bedb89dbe9e977e80e0fd3e23f187a61c215c2. --- src/monty/bisect.py | 6 +++++- src/monty/dev.py | 6 +++--- src/monty/shutil.py | 6 +++--- src/monty/subprocess.py | 4 +++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/monty/bisect.py b/src/monty/bisect.py index 5047a8870..6261d8c2b 100644 --- a/src/monty/bisect.py +++ b/src/monty/bisect.py @@ -10,6 +10,10 @@ from __future__ import annotations import bisect as bs +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Optional __author__ = "Matteo Giantomassi" __copyright__ = "Copyright 2013, The Materials Virtual Lab" @@ -19,7 +23,7 @@ __date__ = "11/09/14" -def index(a: list[float], x: float, atol: float | None = None) -> int: +def index(a: list[float], x: float, atol: Optional[float] = None) -> int: """Locate the leftmost value exactly equal to x.""" i = bs.bisect_left(a, x) if i != len(a): diff --git a/src/monty/dev.py b/src/monty/dev.py index 52714e7ce..882d0ec83 100644 --- a/src/monty/dev.py +++ b/src/monty/dev.py @@ -18,9 +18,9 @@ def deprecated( - replacement: Callable | str | None = None, + replacement: Optional[Callable | str] = None, message: str = "", - deadline: tuple[int, int, int] | None = None, + deadline: Optional[tuple[int, int, int]] = None, category: Type[Warning] = FutureWarning, ) -> Callable: """ @@ -29,7 +29,7 @@ def deprecated( Args: replacement (Callable | str): A replacement class or function. message (str): A warning message to be displayed. - deadline (tuple[int, int, int] | None): Optional deadline for removal + deadline (Optional[tuple[int, int, int]]): Optional deadline for removal of the old function/class, in format (yyyy, MM, dd). A CI warning would be raised after this date if is running in code owner' repo. category (Warning): Choose the category of the warning to issue. Defaults diff --git a/src/monty/shutil.py b/src/monty/shutil.py index 5dc1c8c07..418e53117 100644 --- a/src/monty/shutil.py +++ b/src/monty/shutil.py @@ -12,7 +12,7 @@ from monty.io import zopen if TYPE_CHECKING: - from typing import Literal + from typing import Literal, Optional def copy_r(src: str | Path, dst: str | Path) -> None: @@ -76,7 +76,7 @@ def gzip_dir(path: str | Path, compresslevel: int = 6) -> None: def compress_file( filepath: str | Path, compression: Literal["gz", "bz2"] = "gz", - target_dir: str | Path | None = None, + target_dir: Optional[str | Path] = None, ) -> None: """ Compresses a file with the correct extension. Functions like standard @@ -130,7 +130,7 @@ def compress_dir(path: str | Path, compression: Literal["gz", "bz2"] = "gz") -> def decompress_file( - filepath: str | Path, target_dir: str | Path | None = None + filepath: str | Path, target_dir: Optional[str | Path] = None ) -> str | None: """ Decompresses a file with the correct extension. Automatically detects diff --git a/src/monty/subprocess.py b/src/monty/subprocess.py index 452b4f106..ff77117b2 100644 --- a/src/monty/subprocess.py +++ b/src/monty/subprocess.py @@ -13,6 +13,8 @@ from monty.string import is_string if TYPE_CHECKING: + from typing import Optional + from typing_extensions import Self __author__ = "Matteo Giantomass" @@ -61,7 +63,7 @@ def __init__(self, command: str): def __str__(self): return f"command: {self.command}, retcode: {self.retcode}" - def run(self, timeout: float | None = None, **kwargs) -> Self: + def run(self, timeout: Optional[float] = None, **kwargs) -> Self: """ Run a command in a separated thread and wait timeout seconds. kwargs are keyword arguments passed to Popen. From 275ca4c013289a54d4f38e7c6a5eec1347002045 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 10 Jan 2025 11:51:19 +0800 Subject: [PATCH 14/14] Revert "remove custom encoding warning after python 3.10+" This reverts commit cb6b56040dfc0fcbdacc41d886037868924f80ec. --- src/monty/io.py | 3 +++ tests/test_io.py | 1 + 2 files changed, 4 insertions(+) diff --git a/src/monty/io.py b/src/monty/io.py index 1f3981d9d..1b4548996 100644 --- a/src/monty/io.py +++ b/src/monty/io.py @@ -22,6 +22,9 @@ from typing import IO, Any, Iterator, Union +class EncodingWarning(Warning): ... # Added in Python 3.10 + + def zopen( filename: Union[str, Path], /, diff --git a/tests/test_io.py b/tests/test_io.py index 5b3e6d3a8..b45ac6cd5 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -9,6 +9,7 @@ import pytest from monty.io import ( + EncodingWarning, FileLock, FileLockException, _get_line_ending,