From 5c02ea8bae2287a828840f5734966da23dc573dc Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:56:01 +0100 Subject: [PATCH 1/6] gh-119287: clarify doc on BaseExceptionGroup.derive and link to it from contextlib.suppress (#119657) --- Doc/library/contextlib.rst | 4 +++- Doc/library/exceptions.rst | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst index bad9da52d6a6ca..27cf99446e5980 100644 --- a/Doc/library/contextlib.rst +++ b/Doc/library/contextlib.rst @@ -314,7 +314,9 @@ Functions and classes provided: If the code within the :keyword:`!with` block raises a :exc:`BaseExceptionGroup`, suppressed exceptions are removed from the - group. If any exceptions in the group are not suppressed, a group containing them is re-raised. + group. Any exceptions of the group which are not suppressed are re-raised in + a new group which is created using the original group's :meth:`~BaseExceptionGroup.derive` + method. .. versionadded:: 3.4 diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 7879fb015bddfa..7910b306f143d7 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -989,7 +989,8 @@ their subgroups based on the types of the contained exceptions. Returns an exception group with the same :attr:`message`, but which wraps the exceptions in ``excs``. - This method is used by :meth:`subgroup` and :meth:`split`. A + This method is used by :meth:`subgroup` and :meth:`split`, which + are used in various contexts to break up an exception group. A subclass needs to override it in order to make :meth:`subgroup` and :meth:`split` return instances of the subclass rather than :exc:`ExceptionGroup`. From 4bba1c9e6cfeaf69302b501a4306668613db4b28 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 5 Jun 2024 09:23:29 -0400 Subject: [PATCH 2/6] gh-120065: Increase `collect_in_thread` period to 5 ms. (#120068) This matches the default GIL switch interval. It greatly speeds up the free-threaded build: previously, it spent nearly all its time in `gc.collect()`. --- Lib/test/test_weakref.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 16da24d7805b56..ef2fe92cc219b6 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -82,7 +82,7 @@ def callback(self, ref): @contextlib.contextmanager -def collect_in_thread(period=0.0001): +def collect_in_thread(period=0.005): """ Ensure GC collections happen in a different thread, at a high frequency. """ From 10eac0269bce4e2ba575e5b549d3dd9a6da9349a Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 5 Jun 2024 16:28:47 +0100 Subject: [PATCH 3/6] gh-119786: add links to code in exception handling doc (#120077) --- InternalDocs/exception_handling.md | 49 ++++++++++-------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 22d9c3bf7933f1..ec09e0769929fa 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -67,14 +67,18 @@ handler located at label `L1`. Handling Exceptions ------------------- -At runtime, when an exception occurs, the interpreter looks up -the offset of the current instruction in the exception table. If -it finds a handler, control flow transfers to it. Otherwise, the +At runtime, when an exception occurs, the interpreter calls +``get_exception_handler()`` in +[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c) +to look up the offset of the current instruction in the exception +table. If it finds a handler, control flow transfers to it. Otherwise, the exception bubbles up to the caller, and the caller's frame is checked for a handler covering the `CALL` instruction. This repeats until a handler is found or the topmost frame is reached. If no handler is found, the program terminates. During unwinding, -the traceback is constructed as each frame is added to it. +the traceback is constructed as each frame is added to it by +``PyTraceBack_Here()``, which is in +[Python/traceback.c](https://github.com/python/cpython/blob/main/Python/traceback.c). Along with the location of an exception handler, each entry of the exception table also contains the stack depth of the `try` instruction @@ -169,33 +173,12 @@ which is then encoded as: for a total of five bytes. +The code to construct the exception table is in ``assemble_exception_table()`` +in [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). -Script to parse the exception table ------------------------------------ - -``` -def parse_varint(iterator): - b = next(iterator) - val = b & 63 - while b&64: - val <<= 6 - b = next(iterator) - val |= b&63 - return val -``` -``` -def parse_exception_table(code): - iterator = iter(code.co_exceptiontable) - try: - while True: - start = parse_varint(iterator)*2 - length = parse_varint(iterator)*2 - end = start + length - 2 # Present as inclusive, not exclusive - target = parse_varint(iterator)*2 - dl = parse_varint(iterator) - depth = dl >> 1 - lasti = bool(dl&1) - yield start, end, target, depth, lasti - except StopIteration: - return -``` +The interpreter's function to lookup the table by instruction offset is +``get_exception_handler()`` in +[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). +The Python function ``_parse_exception_table()`` in +[Lib/dis.py](https://github.com/python/cpython/blob/main/Lib/dis.py) +returns the exception table content as a list of namedtuple instances. From 14e3c7071bd1add30d4b69b62e011c7d38aebd9b Mon Sep 17 00:00:00 2001 From: benchatt Date: Wed, 5 Jun 2024 10:35:40 -0700 Subject: [PATCH 4/6] gh-115225: Raise error on unsupported ISO 8601 time strings (#119339) Some time strings that contain fractional hours or minutes are permitted by ISO 8601, but such strings are very unlikely to be intentional. The current parser does not parse such strings correctly or raise an error. This change raises a ValueError when hours or minutes contain a decimal mark. Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Lib/test/datetimetester.py | 2 ++ Misc/ACKS | 1 + .../Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst | 1 + Modules/_datetimemodule.c | 3 +++ 4 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 535b17d0727611..3759504b02e550 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -4412,6 +4412,8 @@ def test_fromisoformat_fails(self): '12:30:45.123456-', # Extra at end of microsecond time '12:30:45.123456+', # Extra at end of microsecond time '12:30:45.123456+12:00:30a', # Extra at end of full time + '12.5', # Decimal mark at end of hour + '12:30,5', # Decimal mark at end of minute ] for bad_str in bad_strs: diff --git a/Misc/ACKS b/Misc/ACKS index 2e7e12481bacd7..af92d81ff3141a 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -315,6 +315,7 @@ Greg Chapman Mitch Chapman Matt Chaput William Chargin +Ben Chatterton Yogesh Chaudhari Gautam Chaudhuri David Chaum diff --git a/Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst b/Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst new file mode 100644 index 00000000000000..2b65eaa6dd70ad --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst @@ -0,0 +1 @@ +Raise error on certain technically valid but pathological ISO 8601 strings passed to :meth:`datetime.time.fromisoformat` that were previously parsed incorrectly. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index d6fa273c75e15e..bea6e9411a75ed 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1020,6 +1020,9 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, continue; } else if (c == '.' || c == ',') { + if (i < 2) { + return -3; // Decimal mark on hour or minute + } break; } else if (!has_separator) { --p; From e83ce850f433fd8bbf8ff4e8d7649b942639db31 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 5 Jun 2024 18:54:50 +0100 Subject: [PATCH 5/6] pathlib ABCs: remove duplicate `realpath()` implementation. (#119178) Add private `posixpath._realpath()` function, which is a generic version of `realpath()` that can be parameterised with string tokens (`sep`, `curdir`, `pardir`) and query functions (`getcwd`, `lstat`, `readlink`). Also add support for limiting the number of symlink traversals. In the private `pathlib._abc.PathBase` class, call `posixpath._realpath()` and remove our re-implementation of the same algorithm. No change to any public APIs, either in `posixpath` or `pathlib`. Co-authored-by: Nice Zombies --- Lib/pathlib/_abc.py | 87 +++++++++++++++------------------------------ Lib/posixpath.py | 40 +++++++++++++++------ 2 files changed, 57 insertions(+), 70 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index d7471b6927331d..1a74f457c3f5a7 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,8 +12,8 @@ """ import functools +import posixpath from glob import _Globber, _no_recurse_symlinks -from errno import ENOTDIR, ELOOP from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -696,65 +696,34 @@ def resolve(self, strict=False): """ if self._resolving: return self - path_root, parts = self._stack - path = self.with_segments(path_root) - try: - path = path.absolute() - except UnsupportedOperation: - path_tail = [] - else: - path_root, path_tail = path._stack - path_tail.reverse() - - # If the user has *not* overridden the `readlink()` method, then symlinks are unsupported - # and (in non-strict mode) we can improve performance by not calling `stat()`. - querying = strict or getattr(self.readlink, '_supported', True) - link_count = 0 - while parts: - part = parts.pop() - if not part or part == '.': - continue - if part == '..': - if not path_tail: - if path_root: - # Delete '..' segment immediately following root - continue - elif path_tail[-1] != '..': - # Delete '..' segment and its predecessor - path_tail.pop() - continue - path_tail.append(part) - if querying and part != '..': - path = self.with_segments(path_root + self.parser.sep.join(path_tail)) + + def getcwd(): + return str(self.with_segments().absolute()) + + if strict or getattr(self.readlink, '_supported', True): + def lstat(path_str): + path = self.with_segments(path_str) path._resolving = True - try: - st = path.stat(follow_symlinks=False) - if S_ISLNK(st.st_mode): - # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are - # encountered during resolution. - link_count += 1 - if link_count >= self._max_symlinks: - raise OSError(ELOOP, "Too many symbolic links in path", self._raw_path) - target_root, target_parts = path.readlink()._stack - # If the symlink target is absolute (like '/etc/hosts'), set the current - # path to its uppermost parent (like '/'). - if target_root: - path_root = target_root - path_tail.clear() - else: - path_tail.pop() - # Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to - # the stack of unresolved path parts. - parts.extend(target_parts) - continue - elif parts and not S_ISDIR(st.st_mode): - raise NotADirectoryError(ENOTDIR, "Not a directory", self._raw_path) - except OSError: - if strict: - raise - else: - querying = False - return self.with_segments(path_root + self.parser.sep.join(path_tail)) + return path.lstat() + + def readlink(path_str): + path = self.with_segments(path_str) + path._resolving = True + return str(path.readlink()) + else: + # If the user has *not* overridden the `readlink()` method, then + # symlinks are unsupported and (in non-strict mode) we can improve + # performance by not calling `path.lstat()`. + def skip(path_str): + # This exception will be internally consumed by `_realpath()`. + raise OSError("Operation skipped.") + + lstat = readlink = skip + + return self.with_segments(posixpath._realpath( + str(self), strict, self.parser.sep, + getcwd=getcwd, lstat=lstat, readlink=readlink, + maxlinks=self._max_symlinks)) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 47b2aa572e5c65..fccca4e066b76f 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -22,6 +22,7 @@ altsep = None devnull = '/dev/null' +import errno import os import sys import stat @@ -401,7 +402,10 @@ def realpath(filename, *, strict=False): curdir = '.' pardir = '..' getcwd = os.getcwd + return _realpath(filename, strict, sep, curdir, pardir, getcwd) +def _realpath(filename, strict=False, sep=sep, curdir=curdir, pardir=pardir, + getcwd=os.getcwd, lstat=os.lstat, readlink=os.readlink, maxlinks=None): # The stack of unresolved path parts. When popped, a special value of None # indicates that a symlink target has been resolved, and that the original # symlink path can be retrieved by popping again. The [::-1] slice is a @@ -418,6 +422,10 @@ def realpath(filename, *, strict=False): # the same links. seen = {} + # Number of symlinks traversed. When the number of traversals is limited + # by *maxlinks*, this is used instead of *seen* to detect symlink loops. + link_count = 0 + while rest: name = rest.pop() if name is None: @@ -436,11 +444,19 @@ def realpath(filename, *, strict=False): else: newpath = path + sep + name try: - st = os.lstat(newpath) + st = lstat(newpath) if not stat.S_ISLNK(st.st_mode): path = newpath continue - if newpath in seen: + elif maxlinks is not None: + link_count += 1 + if link_count > maxlinks: + if strict: + raise OSError(errno.ELOOP, os.strerror(errno.ELOOP), + newpath) + path = newpath + continue + elif newpath in seen: # Already seen this path path = seen[newpath] if path is not None: @@ -448,26 +464,28 @@ def realpath(filename, *, strict=False): continue # The symlink is not resolved, so we must have a symlink loop. if strict: - # Raise OSError(errno.ELOOP) - os.stat(newpath) + raise OSError(errno.ELOOP, os.strerror(errno.ELOOP), + newpath) path = newpath continue - target = os.readlink(newpath) + target = readlink(newpath) except OSError: if strict: raise path = newpath continue # Resolve the symbolic link - seen[newpath] = None # not resolved symlink if target.startswith(sep): # Symlink target is absolute; reset resolved path. path = sep - # Push the symlink path onto the stack, and signal its specialness by - # also pushing None. When these entries are popped, we'll record the - # fully-resolved symlink target in the 'seen' mapping. - rest.append(newpath) - rest.append(None) + if maxlinks is None: + # Mark this symlink as seen but not fully resolved. + seen[newpath] = None + # Push the symlink path onto the stack, and signal its specialness + # by also pushing None. When these entries are popped, we'll + # record the fully-resolved symlink target in the 'seen' mapping. + rest.append(newpath) + rest.append(None) # Push the unresolved symlink target parts onto the stack. rest.extend(target.split(sep)[::-1]) From f878d46e5614f08a9302fcb6fc611ef49e9acf2f Mon Sep 17 00:00:00 2001 From: Jan Kaliszewski Date: Wed, 5 Jun 2024 23:52:40 +0200 Subject: [PATCH 6/6] gh-120128: fix description of argument to ipaddress.collapse_addresses() (#120131) The argument to collapse_addresses() is now described as an *iterable* (rather than *iterator*). --- Doc/library/ipaddress.rst | 2 +- Lib/ipaddress.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst index ead841b0581e21..f58c0ea75a4753 100644 --- a/Doc/library/ipaddress.rst +++ b/Doc/library/ipaddress.rst @@ -990,7 +990,7 @@ The module also provides the following module level functions: .. function:: collapse_addresses(addresses) Return an iterator of the collapsed :class:`IPv4Network` or - :class:`IPv6Network` objects. *addresses* is an iterator of + :class:`IPv6Network` objects. *addresses* is an :term:`iterable` of :class:`IPv4Network` or :class:`IPv6Network` objects. A :exc:`TypeError` is raised if *addresses* contains mixed version objects. diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 8e4d49c859534d..9cef275f7ae2fc 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -310,7 +310,7 @@ def collapse_addresses(addresses): [IPv4Network('192.0.2.0/24')] Args: - addresses: An iterator of IPv4Network or IPv6Network objects. + addresses: An iterable of IPv4Network or IPv6Network objects. Returns: An iterator of the collapsed IPv(4|6)Network objects.