From 9345dc165c3389208da45d391bf5b2d146302e75 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:33:18 +0100 Subject: [PATCH 01/43] [3.12] gh-126594: Fix typeobject.c wrap_buffer() cast (GH-126754) (#127005) gh-126594: Fix typeobject.c wrap_buffer() cast (GH-126754) Reject flags smaller than INT_MIN. (cherry picked from commit 84f07c3a4cbcfe488ccfb4030571be0bc4de7e45) Co-authored-by: Victor Stinner Co-authored-by: Jelle Zijlstra --- Lib/test/test_buffer.py | 15 +++++++++++++++ Objects/typeobject.c | 6 +++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py index 84a34bccbc9af1..5f3e8b0ff59f8d 100644 --- a/Lib/test/test_buffer.py +++ b/Lib/test/test_buffer.py @@ -4442,6 +4442,21 @@ def test_pybuffer_size_from_format(self): self.assertEqual(_testcapi.PyBuffer_SizeFromFormat(format), struct.calcsize(format)) + @support.cpython_only + def test_flags_overflow(self): + # gh-126594: Check for integer overlow on large flags + try: + from _testcapi import INT_MIN, INT_MAX + except ImportError: + INT_MIN = -(2 ** 31) + INT_MAX = 2 ** 31 - 1 + + obj = b'abc' + for flags in (INT_MIN - 1, INT_MAX + 1): + with self.subTest(flags=flags): + with self.assertRaises(OverflowError): + obj.__buffer__(flags) + class TestPythonBufferProtocol(unittest.TestCase): def test_basic(self): diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7c678907ed5602..5bca4b4e788608 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8207,13 +8207,13 @@ wrap_buffer(PyObject *self, PyObject *args, void *wrapped) if (flags == -1 && PyErr_Occurred()) { return NULL; } - if (flags > INT_MAX) { + if (flags > INT_MAX || flags < INT_MIN) { PyErr_SetString(PyExc_OverflowError, - "buffer flags too large"); + "buffer flags out of range"); return NULL; } - return _PyMemoryView_FromBufferProc(self, Py_SAFE_DOWNCAST(flags, Py_ssize_t, int), + return _PyMemoryView_FromBufferProc(self, (int)flags, (getbufferproc)wrapped); } From 1f911ba7061feb4ee072cdd0da0e3be8c41fd690 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Tue, 19 Nov 2024 18:01:54 +0800 Subject: [PATCH 02/43] [3.12] gh-126789: Correct sysconfig test exclusions for iOS and Android. (GH-126941) (GH-126959) (cherry picked from commit 3938fd60c0c88891b213097380aeea91a45bcd77) --- Lib/test/test_sysconfig.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index b91977d0cc7624..3468d0ce022ae0 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -520,7 +520,7 @@ def test_osx_ext_suffix(self): suffix = sysconfig.get_config_var('EXT_SUFFIX') self.assertTrue(suffix.endswith('-darwin.so'), suffix) - @unittest.skipIf(sys.platform == 'wasi', 'venv is unsupported on WASI') + @requires_subprocess() def test_config_vars_depend_on_site_initialization(self): script = textwrap.dedent(""" import sysconfig @@ -544,7 +544,7 @@ def test_config_vars_depend_on_site_initialization(self): self.assertEqual(no_site_config_vars['base'], site_config_vars['installed_base']) self.assertEqual(no_site_config_vars['platbase'], site_config_vars['installed_platbase']) - @unittest.skipIf(sys.platform == 'wasi', 'venv is unsupported on WASI') + @requires_subprocess() def test_config_vars_recalculation_after_site_initialization(self): script = textwrap.dedent(""" import sysconfig @@ -568,7 +568,7 @@ def test_config_vars_recalculation_after_site_initialization(self): #self.assertEqual(config_vars['after']['prefix'], venv.prefix) # FIXME: prefix gets overwriten by _init_posix #self.assertEqual(config_vars['after']['exec_prefix'], venv.prefix) # FIXME: exec_prefix gets overwriten by _init_posix - @unittest.skipIf(sys.platform == 'wasi', 'venv is unsupported on WASI') + @requires_subprocess() def test_paths_depend_on_site_initialization(self): script = textwrap.dedent(""" import sysconfig From cb98122df25d618bfdcaadedbfb23615b8dd362b Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Nov 2024 16:41:41 +0100 Subject: [PATCH 03/43] [3.12] Update docs 'make serve' to suggest 'make htmllive' (GH-126969) (#127016) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/Makefile b/Doc/Makefile index a090ee5ba92705..22e43ee3e542ee 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -294,7 +294,7 @@ check: _ensure-pre-commit .PHONY: serve serve: - @echo "The serve target was removed, use htmlview instead (see bpo-36329)" + @echo "The serve target was removed, use htmllive instead (see gh-80510)" # Targets for daily automated doc build # By default, Sphinx only rebuilds pages where the page content has changed. From 153221a1d050a74f1cfbb7af69232cb92bf505ba Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Nov 2024 19:19:18 +0100 Subject: [PATCH 04/43] [3.12] gh-126991: Fix reference leak in loading pickle's opcode BUILD (GH-126990) (GH-127019) If PyObject_SetItem() fails in the `load_build()` function of _pickle.c, no DECREF for the `dict` variable. (cherry picked from commit 29cbcbd73bbfd8c953c0b213fb33682c289934ff) Co-authored-by: Justin Applegate <70449145+Legoclones@users.noreply.github.com> --- Modules/_pickle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 179500d6956a78..b8f701c2af2e67 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -6726,6 +6726,7 @@ load_build(PickleState *st, UnpicklerObject *self) } if (PyObject_SetItem(dict, d_key, d_value) < 0) { Py_DECREF(d_key); + Py_DECREF(dict); goto error; } Py_DECREF(d_key); From 7735f58ea5274fd72caee42b090761df2b27c087 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Nov 2024 22:55:10 +0100 Subject: [PATCH 05/43] [3.12] GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852) (#127040) GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852) Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529. (cherry picked from commit c9b399fbdb01584dcfff0d7f6ad484644ff269c3) Co-authored-by: Barney Gale --- Lib/test/test_urllib.py | 20 +++++++++++++++---- Lib/test/test_urllib2.py | 4 ---- Lib/urllib/request.py | 8 ++++++-- ...4-11-15-01-50-36.gh-issue-85168.bP8VIN.rst | 4 ++++ 4 files changed, 26 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 15a698cc10f217..b06855e7b3a930 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -718,10 +718,6 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - try: - filePath.encode("utf-8") - except UnicodeEncodeError: - raise unittest.SkipTest("filePath is not encodable to utf8") return "file://%s" % urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): @@ -1571,6 +1567,13 @@ def test_pathname2url_posix(self): self.assertEqual(fn('/a/b.c'), '/a/b.c') self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c') + @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') + def test_pathname2url_nonascii(self): + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors) + self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url) + @unittest.skipUnless(sys.platform == 'win32', 'test specific to Windows pathnames.') def test_url2pathname_win(self): @@ -1621,6 +1624,15 @@ def test_url2pathname_posix(self): self.assertEqual(fn('////foo/bar'), '//foo/bar') self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') + @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') + def test_url2pathname_nonascii(self): + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + url = os_helper.FS_NONASCII + self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII) + url = urllib.parse.quote(url, encoding=encoding, errors=errors) + self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII) + class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 69cf1dc7aef19f..12b1053aa23bab 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -716,10 +716,6 @@ def test_processors(self): def sanepathname2url(path): - try: - path.encode("utf-8") - except UnicodeEncodeError: - raise unittest.SkipTest("path is not encodable to utf8") urlpath = urllib.request.pathname2url(path) if os.name == "nt" and urlpath.startswith("///"): urlpath = urlpath[2:] diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 178c9795e19c6e..c89e217b9de1b7 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1685,12 +1685,16 @@ def url2pathname(pathname): # URL has an empty authority section, so the path begins on the # third character. pathname = pathname[2:] - return unquote(pathname) + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return unquote(pathname, encoding=encoding, errors=errors) def pathname2url(pathname): """OS-specific conversion from a file system path to a relative URL of the 'file' scheme; not recommended for general use.""" - return quote(pathname) + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return quote(pathname, encoding=encoding, errors=errors) ftpcache = {} diff --git a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst new file mode 100644 index 00000000000000..abceda8f6fd707 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst @@ -0,0 +1,4 @@ +Fix issue where :func:`urllib.request.url2pathname` and +:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and +unquoting file URIs. They now use the :term:`filesystem encoding and error +handler`. From b33c37aa8c8010d250a68cc1d41d1806ef33a725 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:49:58 +0100 Subject: [PATCH 06/43] [3.12] gh-126991: Add tests for unpickling bad object state (GH-127031) (GH-127064) This catches a memory leak in loading the BUILD opcode. (cherry picked from commit addb225f3823b03774cddacce35214dd471bec46) Co-authored-by: Serhiy Storchaka --- Lib/test/pickletester.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index dd7466e2c150d6..85710a976191e9 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1344,6 +1344,41 @@ def test_bad_newobj_ex(self): self.check_unpickling_error(error, b'cbuiltins\nint\nN}\x92.') self.check_unpickling_error(error, b'cbuiltins\nint\n)N\x92.') + def test_bad_state(self): + c = C() + c.x = None + base = b'c__main__\nC\n)\x81' + self.assertEqual(self.loads(base + b'}X\x01\x00\x00\x00xNsb.'), c) + self.assertEqual(self.loads(base + b'N}X\x01\x00\x00\x00xNs\x86b.'), c) + # non-hashable dict key + self.check_unpickling_error(TypeError, base + b'}]Nsb.') + # state = list + error = (pickle.UnpicklingError, AttributeError) + self.check_unpickling_error(error, base + b'](}}eb.') + # state = 1-tuple + self.check_unpickling_error(error, base + b'}\x85b.') + # state = 3-tuple + self.check_unpickling_error(error, base + b'}}}\x87b.') + # non-hashable slot name + self.check_unpickling_error(TypeError, base + b'}}]Ns\x86b.') + # non-string slot name + self.check_unpickling_error(TypeError, base + b'}}NNs\x86b.') + # dict = True + self.check_unpickling_error(error, base + b'\x88}\x86b.') + # slots dict = True + self.check_unpickling_error(error, base + b'}\x88\x86b.') + + class BadKey1: + count = 1 + def __hash__(self): + if not self.count: + raise CustomError + self.count -= 1 + return 42 + __main__.BadKey1 = BadKey1 + # bad hashable dict key + self.check_unpickling_error(CustomError, base + b'}c__main__\nBadKey1\n)\x81Nsb.') + def test_bad_stack(self): badpickles = [ b'.', # STOP From 512a5bdb2f409cc924214806d09cd266c36249cc Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:37:24 +0100 Subject: [PATCH 07/43] [3.12] Run `apt update` before `apt install git` in autoconf CI job (GH-127071) (cherry picked from commit 0af4ec30bd2e3a52350344d1011c0c125d6dcd71) Co-authored-by: Zachary Ware --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 329ebc42a301b4..f7f96ceb574cc0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -104,7 +104,7 @@ jobs: steps: - name: Install Git run: | - apt install git -yq + apt update && apt install git -yq git config --global --add safe.directory "$GITHUB_WORKSPACE" - uses: actions/checkout@v4 with: From d997be051017ac84d3b5ebf69aa0a11b2a61c230 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Thu, 21 Nov 2024 12:32:16 +0100 Subject: [PATCH 08/43] [3.12] gh-126997: Fix support of non-ASCII strings in pickletools (GH-127062) (GH-127095) * Fix support of STRING and GLOBAL opcodes with non-ASCII arguments. * dis() now outputs non-ASCII bytes in STRING, BINSTRING and SHORT_BINSTRING arguments as escaped (\xXX). (cherry picked from commit eaf217108226633c03cc5c4c90f0b6e4587c8803) Co-authored-by: Serhiy Storchaka --- Lib/pickletools.py | 11 ++- Lib/test/test_pickletools.py | 82 +++++++++++++++++++ ...-11-20-16-58-59.gh-issue-126997.0PI41Y.rst | 3 + 3 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-20-16-58-59.gh-issue-126997.0PI41Y.rst diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 51ee4a7a2632ac..33a51492ea9c3b 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -312,7 +312,7 @@ def read_uint8(f): doc="Eight-byte unsigned integer, little-endian.") -def read_stringnl(f, decode=True, stripquotes=True): +def read_stringnl(f, decode=True, stripquotes=True, *, encoding='latin-1'): r""" >>> import io >>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n")) @@ -356,7 +356,7 @@ def read_stringnl(f, decode=True, stripquotes=True): raise ValueError("no string quotes around %r" % data) if decode: - data = codecs.escape_decode(data)[0].decode("ascii") + data = codecs.escape_decode(data)[0].decode(encoding) return data stringnl = ArgumentDescriptor( @@ -370,7 +370,7 @@ def read_stringnl(f, decode=True, stripquotes=True): """) def read_stringnl_noescape(f): - return read_stringnl(f, stripquotes=False) + return read_stringnl(f, stripquotes=False, encoding='utf-8') stringnl_noescape = ArgumentDescriptor( name='stringnl_noescape', @@ -2513,7 +2513,10 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): # make a mild effort to align arguments line += ' ' * (10 - len(opcode.name)) if arg is not None: - line += ' ' + repr(arg) + if opcode.name in ("STRING", "BINSTRING", "SHORT_BINSTRING"): + line += ' ' + ascii(arg) + else: + line += ' ' + repr(arg) if markmsg: line += ' ' + markmsg if annotate: diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 8cb1f6dffcc6be..5cb9ce430b4657 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -371,6 +371,88 @@ def test_annotate(self): highest protocol among opcodes = 0 ''', annotate=20) + def test_string(self): + self.check_dis(b"S'abc'\n.", '''\ + 0: S STRING 'abc' + 7: . STOP +highest protocol among opcodes = 0 +''') + self.check_dis(b'S"abc"\n.', '''\ + 0: S STRING 'abc' + 7: . STOP +highest protocol among opcodes = 0 +''') + self.check_dis(b"S'\xc3\xb5'\n.", '''\ + 0: S STRING '\\xc3\\xb5' + 6: . STOP +highest protocol among opcodes = 0 +''') + + def test_string_without_quotes(self): + self.check_dis_error(b"Sabc'\n.", '', + 'no string quotes around b"abc\'"') + self.check_dis_error(b'Sabc"\n.', '', + "no string quotes around b'abc\"'") + self.check_dis_error(b"S'abc\n.", '', + '''strinq quote b"'" not found at both ends of b"'abc"''') + self.check_dis_error(b'S"abc\n.', '', + r"""strinq quote b'"' not found at both ends of b'"abc'""") + self.check_dis_error(b"S'abc\"\n.", '', + r"""strinq quote b"'" not found at both ends of b'\\'abc"'""") + self.check_dis_error(b"S\"abc'\n.", '', + r"""strinq quote b'"' not found at both ends of b'"abc\\''""") + + def test_binstring(self): + self.check_dis(b"T\x03\x00\x00\x00abc.", '''\ + 0: T BINSTRING 'abc' + 8: . STOP +highest protocol among opcodes = 1 +''') + self.check_dis(b"T\x02\x00\x00\x00\xc3\xb5.", '''\ + 0: T BINSTRING '\\xc3\\xb5' + 7: . STOP +highest protocol among opcodes = 1 +''') + + def test_short_binstring(self): + self.check_dis(b"U\x03abc.", '''\ + 0: U SHORT_BINSTRING 'abc' + 5: . STOP +highest protocol among opcodes = 1 +''') + self.check_dis(b"U\x02\xc3\xb5.", '''\ + 0: U SHORT_BINSTRING '\\xc3\\xb5' + 4: . STOP +highest protocol among opcodes = 1 +''') + + def test_global(self): + self.check_dis(b"cmodule\nname\n.", '''\ + 0: c GLOBAL 'module name' + 13: . STOP +highest protocol among opcodes = 0 +''') + self.check_dis(b"cm\xc3\xb6dule\nn\xc3\xa4me\n.", '''\ + 0: c GLOBAL 'm\xf6dule n\xe4me' + 15: . STOP +highest protocol among opcodes = 0 +''') + + def test_inst(self): + self.check_dis(b"(imodule\nname\n.", '''\ + 0: ( MARK + 1: i INST 'module name' (MARK at 0) + 14: . STOP +highest protocol among opcodes = 0 +''') + + def test_persid(self): + self.check_dis(b"Pabc\n.", '''\ + 0: P PERSID 'abc' + 5: . STOP +highest protocol among opcodes = 0 +''') + class MiscTestCase(unittest.TestCase): def test__all__(self): diff --git a/Misc/NEWS.d/next/Library/2024-11-20-16-58-59.gh-issue-126997.0PI41Y.rst b/Misc/NEWS.d/next/Library/2024-11-20-16-58-59.gh-issue-126997.0PI41Y.rst new file mode 100644 index 00000000000000..b85c51ef07dcbe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-20-16-58-59.gh-issue-126997.0PI41Y.rst @@ -0,0 +1,3 @@ +Fix support of STRING and GLOBAL opcodes with non-ASCII arguments in +:mod:`pickletools`. :func:`pickletools.dis` now outputs non-ASCII bytes in +STRING, BINSTRING and SHORT_BINSTRING arguments as escaped (``\xXX``). From 4b4e0dbdf49adc91c35a357ad332ab3abd4c31b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 21 Nov 2024 13:44:37 +0200 Subject: [PATCH 09/43] [3.12] gh-126727: Fix locale.nl_langinfo(locale.ERA) (GH-126730) (GH-127098) It now returns multiple era description segments separated by semicolons. Previously it only returned the first segment on platforms with Glibc. (cherry picked from commit 4803cd0244847f286641c85591fda08b513cea52) --- Doc/library/locale.rst | 10 +-- Lib/test/test__locale.py | 46 +++++++++++++ ...-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst | 3 + Modules/_localemodule.c | 65 ++++++++++++------- 4 files changed, 96 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index a81879a2fe48dc..fee5aba7ee3e32 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions: .. data:: ERA - Get a string that represents the era used in the current locale. + Get a string which describes how years are counted and displayed for + each era in a locale. Most locales do not define this value. An example of a locale which does define this value is the Japanese one. In Japan, the traditional @@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions: Normally it should not be necessary to use this value directly. Specifying the ``E`` modifier in their format strings causes the :func:`time.strftime` - function to use this information. The format of the returned string is not - specified, and therefore you should not assume knowledge of it on different - systems. + function to use this information. + The format of the returned string is specified in *The Open Group Base + Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access + `_. .. data:: ERA_D_T_FMT diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index a680e6edb63c0e..89c203250557f0 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -90,6 +90,14 @@ def accept(loc): 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), } +known_era = { + 'C': (0, ''), + 'en_US': (0, ''), + 'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'), + 'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'), + 'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'), +} + if sys.platform == 'win32': # ps_AF doesn't work on Windows: see bpo-38324 (msg361830) del known_numerics['ps_AF'] @@ -228,6 +236,44 @@ def test_alt_digits_nl_langinfo(self): if not tested: self.skipTest('no suitable locales') + @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available") + @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA") + @unittest.skipIf( + support.is_emscripten or support.is_wasi, + "musl libc issue on Emscripten, bpo-46390" + ) + def test_era_nl_langinfo(self): + # Test nl_langinfo(ERA) + tested = False + for loc in candidate_locales: + with self.subTest(locale=loc): + try: + setlocale(LC_TIME, loc) + setlocale(LC_CTYPE, loc) + except Error: + self.skipTest(f'no locale {loc!r}') + continue + + with self.subTest(locale=loc): + era = nl_langinfo(locale.ERA) + self.assertIsInstance(era, str) + if era: + self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era) + + loc1 = loc.split('.', 1)[0] + if loc1 in known_era: + count, sample = known_era[loc1] + if count: + if not era: + self.skipTest(f'ERA is not set for locale {loc!r} on this platform') + self.assertGreaterEqual(era.count(';') + 1, count) + self.assertIn(sample, era) + else: + self.assertEqual(era, '') + tested = True + if not tested: + self.skipTest('no suitable locales') + def test_float_parsing(self): # Bug #1391872: Test whether float parsing is okay on European # locales. diff --git a/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst new file mode 100644 index 00000000000000..7bec8a6b7a830a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst @@ -0,0 +1,3 @@ +``locale.nl_langinfo(locale.ERA)`` now returns multiple era description +segments separated by semicolons. Previously it only returned the first +segment on platforms with Glibc. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 53ebb57d23ae07..db8194372dae49 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -595,6 +595,37 @@ static struct langinfo_constant{ {0, 0} }; +#ifdef __GLIBC__ +#if defined(ALT_DIGITS) || defined(ERA) +static PyObject * +decode_strings(const char *result, size_t max_count) +{ + /* Convert a sequence of NUL-separated C strings to a Python string + * containing semicolon separated items. */ + size_t i = 0; + size_t count = 0; + for (; count < max_count && result[i]; count++) { + i += strlen(result + i) + 1; + } + char *buf = PyMem_Malloc(i); + if (buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(buf, result, i); + /* Replace all NULs with semicolons. */ + i = 0; + while (--count) { + i += strlen(buf + i); + buf[i++] = ';'; + } + PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL); + PyMem_Free(buf); + return pyresult; +} +#endif +#endif + /*[clinic input] _locale.nl_langinfo @@ -620,32 +651,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item) result = result != NULL ? result : ""; PyObject *pyresult; #ifdef __GLIBC__ + /* According to the POSIX specification the result must be + * a sequence of semicolon-separated strings. + * But in Glibc they are NUL-separated. */ #ifdef ALT_DIGITS if (item == ALT_DIGITS && *result) { - /* According to the POSIX specification the result must be - * a sequence of up to 100 semicolon-separated strings. - * But in Glibc they are NUL-separated. */ - Py_ssize_t i = 0; - int count = 0; - for (; count < 100 && result[i]; count++) { - i += strlen(result + i) + 1; - } - char *buf = PyMem_Malloc(i); - if (buf == NULL) { - PyErr_NoMemory(); - pyresult = NULL; - } - else { - memcpy(buf, result, i); - /* Replace all NULs with semicolons. */ - i = 0; - while (--count) { - i += strlen(buf + i); - buf[i++] = ';'; - } - pyresult = PyUnicode_DecodeLocale(buf, NULL); - PyMem_Free(buf); - } + pyresult = decode_strings(result, 100); + } + else +#endif +#ifdef ERA + if (item == ERA && *result) { + pyresult = decode_strings(result, SIZE_MAX); } else #endif From 4b705f50d1c65536e19048b32397e8118d92558d Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 22 Nov 2024 04:42:46 +0100 Subject: [PATCH 10/43] [3.12] GH-126766: `url2pathname()`: handle 'localhost' authority (GH-127129) (#127131) GH-126766: `url2pathname()`: handle 'localhost' authority (GH-127129) Discard any 'localhost' authority from the beginning of a `file:` URI. As a result, file URIs like `//localhost/etc/hosts` are correctly decoded as `/etc/hosts`. (cherry picked from commit ebf564a1d3e2e81b9846535114e481d6096443d2) Co-authored-by: Barney Gale --- Lib/nturl2path.py | 11 +++++++---- Lib/test/test_urllib.py | 4 +++- Lib/urllib/request.py | 3 +++ .../2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst | 2 ++ 4 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 255eb2f547c2ce..6e0d26c129cb99 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -15,14 +15,17 @@ def url2pathname(url): # become # C:\foo\bar\spam.foo import string, urllib.parse + if url[:3] == '///': + # URL has an empty authority section, so the path begins on the third + # character. + url = url[2:] + elif url[:12] == '//localhost/': + # Skip past 'localhost' authority. + url = url[11:] # Windows itself uses ":" even in URLs. url = url.replace(':', '|') if not '|' in url: # No drive specifier, just convert slashes - if url[:3] == '///': - # URL has an empty authority section, so the path begins on the - # third character. - url = url[2:] # make sure not to convert quoted slashes :-) return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index b06855e7b3a930..4520f8c85e6b03 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1604,6 +1604,8 @@ def test_url2pathname_win(self): # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file') + self.assertEqual(fn('//localhost//server/path/to/file'), '\\\\server\\path\\to\\file') # Percent-encoded forward slashes are preserved for backwards compatibility self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar') self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') @@ -1622,7 +1624,7 @@ def test_url2pathname_posix(self): self.assertEqual(fn('//foo/bar'), '//foo/bar') self.assertEqual(fn('///foo/bar'), '/foo/bar') self.assertEqual(fn('////foo/bar'), '//foo/bar') - self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') + self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_url2pathname_nonascii(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index c89e217b9de1b7..c521d96d953883 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1685,6 +1685,9 @@ def url2pathname(pathname): # URL has an empty authority section, so the path begins on the # third character. pathname = pathname[2:] + elif pathname[:12] == '//localhost/': + # Skip past 'localhost' authority. + pathname = pathname[11:] encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return unquote(pathname, encoding=encoding, errors=errors) diff --git a/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst new file mode 100644 index 00000000000000..998c99bf4358d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst @@ -0,0 +1,2 @@ +Fix issue where :func:`urllib.request.url2pathname` failed to discard any +'localhost' authority present in the URL. From c470e822bf6b8cde65c61323d0d7892c5df37326 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 22 Nov 2024 05:37:51 +0100 Subject: [PATCH 11/43] [3.12] GH-127078: `url2pathname()`: handle extra slash before UNC drive in URL path (GH-127132) (#127136) GH-127078: `url2pathname()`: handle extra slash before UNC drive in URL path (GH-127132) Decode a file URI like `file://///server/share` as a UNC path like `\\server\share`. This form of file URI is created by software the simply prepends `file:///` to any absolute Windows path. (cherry picked from commit 8c98ed846a7d7e50c4cf06f823d94737144dcf6a) Co-authored-by: Barney Gale --- Lib/nturl2path.py | 3 +++ Lib/test/test_urllib.py | 2 +- .../Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 6e0d26c129cb99..757fd01bec8223 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -22,6 +22,9 @@ def url2pathname(url): elif url[:12] == '//localhost/': # Skip past 'localhost' authority. url = url[11:] + if url[:3] == '///': + # Skip past extra slash before UNC drive in URL path. + url = url[1:] # Windows itself uses ":" even in URLs. url = url.replace(':', '|') if not '|' in url: diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4520f8c85e6b03..4bdbc19c1ffb7a 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1600,7 +1600,7 @@ def test_url2pathname_win(self): # UNC paths self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') - self.assertEqual(fn('/////server/path/to/file'), '\\\\\\server\\path\\to\\file') + self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file') # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') diff --git a/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst b/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst new file mode 100644 index 00000000000000..a84c06f3c7a273 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst @@ -0,0 +1,2 @@ +Fix issue where :func:`urllib.request.url2pathname` failed to discard an +extra slash before a UNC drive in the URL path on Windows. From caad9047cb13a4e60eb708ce07578576ce9a84c9 Mon Sep 17 00:00:00 2001 From: Jun Komoda <45822440+junkmd@users.noreply.github.com> Date: Fri, 22 Nov 2024 22:08:37 +0900 Subject: [PATCH 12/43] [3.12] gh-127082: Replace "Windows only" with the `availability: Windows` in `ctypes` doc (GH-127099) (#127145) (cherry picked from commit 3c770e3f0978d825c5ebea98fcd654660e7e135f) --- Doc/library/ctypes.rst | 59 ++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 9ebf9d8f751a96..4b5c8f8b7c1257 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -1390,13 +1390,15 @@ way is to instantiate one of the following classes: .. class:: OleDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None) - Windows only: Instances of this class represent loaded shared libraries, + Instances of this class represent loaded shared libraries, functions in these libraries use the ``stdcall`` calling convention, and are assumed to return the windows specific :class:`HRESULT` code. :class:`HRESULT` values contain information specifying whether the function call failed or succeeded, together with additional error code. If the return value signals a failure, an :class:`OSError` is automatically raised. + .. availability:: Windows + .. versionchanged:: 3.3 :exc:`WindowsError` used to be raised, which is now an alias of :exc:`OSError`. @@ -1408,14 +1410,17 @@ way is to instantiate one of the following classes: .. class:: WinDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None) - Windows only: Instances of this class represent loaded shared libraries, + Instances of this class represent loaded shared libraries, functions in these libraries use the ``stdcall`` calling convention, and are assumed to return :c:expr:`int` by default. + .. availability:: Windows + .. versionchanged:: 3.12 The *name* parameter can now be a :term:`path-like object`. + The Python :term:`global interpreter lock` is released before calling any function exported by these libraries, and reacquired afterwards. @@ -1551,13 +1556,17 @@ These prefabricated library loaders are available: .. data:: windll :noindex: - Windows only: Creates :class:`WinDLL` instances. + Creates :class:`WinDLL` instances. + + .. availability:: Windows .. data:: oledll :noindex: - Windows only: Creates :class:`OleDLL` instances. + Creates :class:`OleDLL` instances. + + .. availability:: Windows .. data:: pydll @@ -1729,11 +1738,13 @@ See :ref:`ctypes-callback-functions` for examples. .. function:: WINFUNCTYPE(restype, *argtypes, use_errno=False, use_last_error=False) - Windows only: The returned function prototype creates functions that use the + The returned function prototype creates functions that use the ``stdcall`` calling convention. The function will release the GIL during the call. *use_errno* and *use_last_error* have the same meaning as above. + .. availability:: Windows + .. function:: PYFUNCTYPE(restype, *argtypes) @@ -1959,17 +1970,21 @@ Utility functions .. function:: DllCanUnloadNow() - Windows only: This function is a hook which allows implementing in-process + This function is a hook which allows implementing in-process COM servers with ctypes. It is called from the DllCanUnloadNow function that the _ctypes extension dll exports. + .. availability:: Windows + .. function:: DllGetClassObject() - Windows only: This function is a hook which allows implementing in-process + This function is a hook which allows implementing in-process COM servers with ctypes. It is called from the DllGetClassObject function that the ``_ctypes`` extension dll exports. + .. availability:: Windows + .. function:: find_library(name) :module: ctypes.util @@ -1985,7 +2000,7 @@ Utility functions .. function:: find_msvcrt() :module: ctypes.util - Windows only: return the filename of the VC runtime library used by Python, + Returns the filename of the VC runtime library used by Python, and by the extension modules. If the name of the library cannot be determined, ``None`` is returned. @@ -1993,20 +2008,27 @@ Utility functions with a call to the ``free(void *)``, it is important that you use the function in the same library that allocated the memory. + .. availability:: Windows + .. function:: FormatError([code]) - Windows only: Returns a textual description of the error code *code*. If no + Returns a textual description of the error code *code*. If no error code is specified, the last error code is used by calling the Windows api function GetLastError. + .. availability:: Windows + .. function:: GetLastError() - Windows only: Returns the last error code set by Windows in the calling thread. + Returns the last error code set by Windows in the calling thread. This function calls the Windows ``GetLastError()`` function directly, it does not return the ctypes-private copy of the error code. + .. availability:: Windows + + .. function:: get_errno() Returns the current value of the ctypes-private copy of the system @@ -2016,11 +2038,14 @@ Utility functions .. function:: get_last_error() - Windows only: returns the current value of the ctypes-private copy of the system + Returns the current value of the ctypes-private copy of the system :data:`!LastError` variable in the calling thread. + .. availability:: Windows + .. audit-event:: ctypes.get_last_error "" ctypes.get_last_error + .. function:: memmove(dst, src, count) Same as the standard C memmove library function: copies *count* bytes from @@ -2069,10 +2094,12 @@ Utility functions .. function:: set_last_error(value) - Windows only: set the current value of the ctypes-private copy of the system + Sets the current value of the ctypes-private copy of the system :data:`!LastError` variable in the calling thread to *value* and return the previous value. + .. availability:: Windows + .. audit-event:: ctypes.set_last_error error ctypes.set_last_error @@ -2093,12 +2120,14 @@ Utility functions .. function:: WinError(code=None, descr=None) - Windows only: this function is probably the worst-named thing in ctypes. It + This function is probably the worst-named thing in ctypes. It creates an instance of :exc:`OSError`. If *code* is not specified, ``GetLastError`` is called to determine the error code. If *descr* is not specified, :func:`FormatError` is called to get a textual description of the error. + .. availability:: Windows + .. versionchanged:: 3.3 An instance of :exc:`WindowsError` used to be created, which is now an alias of :exc:`OSError`. @@ -2438,9 +2467,11 @@ These are the fundamental ctypes data types: .. class:: HRESULT - Windows only: Represents a :c:type:`!HRESULT` value, which contains success or + Represents a :c:type:`!HRESULT` value, which contains success or error information for a function or method call. + .. availability:: Windows + .. class:: py_object From 32bc8e83db3bcca7138dbd23e7925c0a5217c0a0 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:13:43 +0100 Subject: [PATCH 13/43] [3.12] GH-122679: Add `register()` to argparse docs (GH-126939) (GH-127148) (cherry picked from commit fcfdb55465636afc256bc29781b283404d88e6ca) Co-authored-by: Savannah Ostrowski --- Doc/library/argparse.rst | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index b142135f6e635b..00d7c384946cf1 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -750,7 +750,8 @@ Only actions that consume command-line arguments (e.g. ``'store'``, The recommended way to create a custom action is to extend :class:`Action`, overriding the :meth:`!__call__` method and optionally the :meth:`!__init__` and -:meth:`!format_usage` methods. +:meth:`!format_usage` methods. You can also register custom actions using the +:meth:`~ArgumentParser.register` method and reference them by their registered name. An example of a custom action:: @@ -971,10 +972,11 @@ necessary type-checking and type conversions to be performed. If the type_ keyword is used with the default_ keyword, the type converter is only applied if the default is a string. -The argument to ``type`` can be any callable that accepts a single string. +The argument to ``type`` can be a callable that accepts a single string or +the name of a registered type (see :meth:`~ArgumentParser.register`) If the function raises :exc:`ArgumentTypeError`, :exc:`TypeError`, or :exc:`ValueError`, the exception is caught and a nicely formatted error -message is displayed. No other exception types are handled. +message is displayed. Other exception types are not handled. Common built-in types and functions can be used as type converters: @@ -2058,6 +2060,34 @@ Intermixed parsing .. versionadded:: 3.7 +Registering custom types or actions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. method:: ArgumentParser.register(registry_name, value, object) + + Sometimes it's desirable to use a custom string in error messages to provide + more user-friendly output. In these cases, :meth:`!register` can be used to + register custom actions or types with a parser and allow you to reference the + type by their registered name instead of their callable name. + + The :meth:`!register` method accepts three arguments - a *registry_name*, + specifying the internal registry where the object will be stored (e.g., + ``action``, ``type``), *value*, which is the key under which the object will + be registered, and object, the callable to be registered. + + The following example shows how to register a custom type with a parser:: + + >>> import argparse + >>> parser = argparse.ArgumentParser() + >>> parser.register('type', 'hexadecimal integer', lambda s: int(s, 16)) + >>> parser.add_argument('--foo', type='hexadecimal integer') + _StoreAction(option_strings=['--foo'], dest='foo', nargs=None, const=None, default=None, type='hexadecimal integer', choices=None, required=False, help=None, metavar=None, deprecated=False) + >>> parser.parse_args(['--foo', '0xFA']) + Namespace(foo=250) + >>> parser.parse_args(['--foo', '1.2']) + usage: PROG [-h] [--foo FOO] + PROG: error: argument --foo: invalid 'hexadecimal integer' value: '1.2' + Exceptions ---------- From 29648980d440e70ade4e0975b8d6aa83214866ed Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Nov 2024 18:33:50 +0200 Subject: [PATCH 14/43] [3.12] gh-127001: Fix PATHEXT issues in shutil.which() on Windows (GH-127035) (GH-127158) * Name without a PATHEXT extension is only searched if the mode does not include X_OK. * Support multi-component PATHEXT extensions (e.g. ".foo.bar"). * Support files without extensions in PATHEXT contains dot-only extension (".", "..", etc). * Support PATHEXT extensions that end with a dot (e.g. ".foo."). (cherry picked from commit 8899e85de100557899da05f0b37867a371a73800) --- Doc/library/shutil.rst | 6 - Lib/shutil.py | 22 +- Lib/test/test_shutil.py | 515 ++++++++++-------- ...-11-22-10-42-34.gh-issue-127035.UnbDlr.rst | 4 + 4 files changed, 301 insertions(+), 246 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 028778d56518f8..08128f8711b451 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -481,12 +481,6 @@ Directory and files operations or ends with an extension that is in ``PATHEXT``; and filenames that have no extension can now be found. - .. versionchanged:: 3.12.1 - On Windows, if *mode* includes ``os.X_OK``, executables with an - extension in ``PATHEXT`` will be preferred over executables without a - matching extension. - This brings behavior closer to that of Python 3.11. - .. exception:: Error This exception collects exceptions that are raised during a multi-file diff --git a/Lib/shutil.py b/Lib/shutil.py index 20ad1cb5684b25..2d2856912893bd 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -1534,21 +1534,21 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None): if sys.platform == "win32": # PATHEXT is necessary to check on Windows. pathext_source = os.getenv("PATHEXT") or _WIN_DEFAULT_PATHEXT - pathext = [ext for ext in pathext_source.split(os.pathsep) if ext] + pathext = pathext_source.split(os.pathsep) + pathext = [ext.rstrip('.') for ext in pathext if ext] if use_bytes: pathext = [os.fsencode(ext) for ext in pathext] - files = ([cmd] + [cmd + ext for ext in pathext]) + files = [cmd + ext for ext in pathext] - # gh-109590. If we are looking for an executable, we need to look - # for a PATHEXT match. The first cmd is the direct match - # (e.g. python.exe instead of python) - # Check that direct match first if and only if the extension is in PATHEXT - # Otherwise check it last - suffix = os.path.splitext(files[0])[1].upper() - if mode & os.X_OK and not any(suffix == ext.upper() for ext in pathext): - files.append(files.pop(0)) + # If X_OK in mode, simulate the cmd.exe behavior: look at direct + # match if and only if the extension is in PATHEXT. + # If X_OK not in mode, simulate the first result of where.exe: + # always look at direct match before a PATHEXT match. + normcmd = cmd.upper() + if not (mode & os.X_OK) or any(normcmd.endswith(ext.upper()) for ext in pathext): + files.insert(0, cmd) else: # On other platforms you don't have things like PATHEXT to tell you # what file suffixes are executable, so just pass on cmd as-is. @@ -1557,7 +1557,7 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None): seen = set() for dir in path: normdir = os.path.normcase(dir) - if not normdir in seen: + if normdir not in seen: seen.add(normdir) for thefile in files: name = os.path.join(dir, thefile) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 7bc5d12e09c057..c553ea09e07d55 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -71,18 +71,17 @@ def wrap(*args, **kwargs): os.rename = builtin_rename return wrap -def write_file(path, content, binary=False): +def create_file(path, content=b''): """Write *content* to a file located at *path*. If *path* is a tuple instead of a string, os.path.join will be used to - make a path. If *binary* is true, the file will be opened in binary - mode. + make a path. """ if isinstance(path, tuple): path = os.path.join(*path) - mode = 'wb' if binary else 'w' - encoding = None if binary else "utf-8" - with open(path, mode, encoding=encoding) as fp: + if isinstance(content, str): + content = content.encode() + with open(path, 'xb') as fp: fp.write(content) def write_test_file(path, size): @@ -191,7 +190,7 @@ def test_rmtree_works_on_bytes(self): tmp = self.mkdtemp() victim = os.path.join(tmp, 'killme') os.mkdir(victim) - write_file(os.path.join(victim, 'somefile'), 'foo') + create_file(os.path.join(victim, 'somefile'), 'foo') victim = os.fsencode(victim) self.assertIsInstance(victim, bytes) shutil.rmtree(victim) @@ -243,7 +242,7 @@ def test_rmtree_works_on_symlinks(self): for d in dir1, dir2, dir3: os.mkdir(d) file1 = os.path.join(tmp, 'file1') - write_file(file1, 'foo') + create_file(file1, 'foo') link1 = os.path.join(dir1, 'link1') os.symlink(dir2, link1) link2 = os.path.join(dir1, 'link2') @@ -305,7 +304,7 @@ def test_rmtree_works_on_junctions(self): for d in dir1, dir2, dir3: os.mkdir(d) file1 = os.path.join(tmp, 'file1') - write_file(file1, 'foo') + create_file(file1, 'foo') link1 = os.path.join(dir1, 'link1') _winapi.CreateJunction(dir2, link1) link2 = os.path.join(dir1, 'link2') @@ -327,8 +326,8 @@ def test_rmtree_errors_onerror(self): # existing file tmpdir = self.mkdtemp() - write_file((tmpdir, "tstfile"), "") filename = os.path.join(tmpdir, "tstfile") + create_file(filename) with self.assertRaises(NotADirectoryError) as cm: shutil.rmtree(filename) self.assertEqual(cm.exception.filename, filename) @@ -359,8 +358,8 @@ def test_rmtree_errors_onexc(self): # existing file tmpdir = self.mkdtemp() - write_file((tmpdir, "tstfile"), "") filename = os.path.join(tmpdir, "tstfile") + create_file(filename) with self.assertRaises(NotADirectoryError) as cm: shutil.rmtree(filename) self.assertEqual(cm.exception.filename, filename) @@ -549,7 +548,7 @@ def raiser(fn, *args, **kwargs): os.lstat = raiser os.mkdir(TESTFN) - write_file((TESTFN, 'foo'), 'foo') + create_file((TESTFN, 'foo'), 'foo') shutil.rmtree(TESTFN) finally: os.lstat = orig_lstat @@ -622,7 +621,7 @@ def test_rmtree_with_dir_fd(self): self.addCleanup(os.close, dir_fd) os.mkdir(fullname) os.mkdir(os.path.join(fullname, 'subdir')) - write_file(os.path.join(fullname, 'subdir', 'somefile'), 'foo') + create_file(os.path.join(fullname, 'subdir', 'somefile'), 'foo') self.assertTrue(os.path.exists(fullname)) shutil.rmtree(victim, dir_fd=dir_fd) self.assertFalse(os.path.exists(fullname)) @@ -662,7 +661,7 @@ def test_rmtree_on_junction(self): src = os.path.join(TESTFN, 'cheese') dst = os.path.join(TESTFN, 'shop') os.mkdir(src) - open(os.path.join(src, 'spam'), 'wb').close() + create_file(os.path.join(src, 'spam')) _winapi.CreateJunction(src, dst) self.assertRaises(OSError, shutil.rmtree, dst) shutil.rmtree(dst, ignore_errors=True) @@ -704,9 +703,9 @@ def test_copytree_simple(self): dst_dir = os.path.join(self.mkdtemp(), 'destination') self.addCleanup(shutil.rmtree, src_dir) self.addCleanup(shutil.rmtree, os.path.dirname(dst_dir)) - write_file((src_dir, 'test.txt'), '123') + create_file((src_dir, 'test.txt'), '123') os.mkdir(os.path.join(src_dir, 'test_dir')) - write_file((src_dir, 'test_dir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'test.txt'), '456') shutil.copytree(src_dir, dst_dir) self.assertTrue(os.path.isfile(os.path.join(dst_dir, 'test.txt'))) @@ -724,11 +723,11 @@ def test_copytree_dirs_exist_ok(self): self.addCleanup(shutil.rmtree, src_dir) self.addCleanup(shutil.rmtree, dst_dir) - write_file((src_dir, 'nonexisting.txt'), '123') + create_file((src_dir, 'nonexisting.txt'), '123') os.mkdir(os.path.join(src_dir, 'existing_dir')) os.mkdir(os.path.join(dst_dir, 'existing_dir')) - write_file((dst_dir, 'existing_dir', 'existing.txt'), 'will be replaced') - write_file((src_dir, 'existing_dir', 'existing.txt'), 'has been replaced') + create_file((dst_dir, 'existing_dir', 'existing.txt'), 'will be replaced') + create_file((src_dir, 'existing_dir', 'existing.txt'), 'has been replaced') shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True) self.assertTrue(os.path.isfile(os.path.join(dst_dir, 'nonexisting.txt'))) @@ -751,7 +750,7 @@ def test_copytree_symlinks(self): sub_dir = os.path.join(src_dir, 'sub') os.mkdir(src_dir) os.mkdir(sub_dir) - write_file((src_dir, 'file.txt'), 'foo') + create_file((src_dir, 'file.txt'), 'foo') src_link = os.path.join(sub_dir, 'link') dst_link = os.path.join(dst_dir, 'sub/link') os.symlink(os.path.join(src_dir, 'file.txt'), @@ -782,16 +781,16 @@ def test_copytree_with_exclude(self): src_dir = self.mkdtemp() try: dst_dir = join(self.mkdtemp(), 'destination') - write_file((src_dir, 'test.txt'), '123') - write_file((src_dir, 'test.tmp'), '123') + create_file((src_dir, 'test.txt'), '123') + create_file((src_dir, 'test.tmp'), '123') os.mkdir(join(src_dir, 'test_dir')) - write_file((src_dir, 'test_dir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'test.txt'), '456') os.mkdir(join(src_dir, 'test_dir2')) - write_file((src_dir, 'test_dir2', 'test.txt'), '456') + create_file((src_dir, 'test_dir2', 'test.txt'), '456') os.mkdir(join(src_dir, 'test_dir2', 'subdir')) os.mkdir(join(src_dir, 'test_dir2', 'subdir2')) - write_file((src_dir, 'test_dir2', 'subdir', 'test.txt'), '456') - write_file((src_dir, 'test_dir2', 'subdir2', 'test.py'), '456') + create_file((src_dir, 'test_dir2', 'subdir', 'test.txt'), '456') + create_file((src_dir, 'test_dir2', 'subdir2', 'test.py'), '456') # testing glob-like patterns try: @@ -850,7 +849,7 @@ def test_copytree_arg_types_of_ignore(self): os.mkdir(join(src_dir)) os.mkdir(join(src_dir, 'test_dir')) os.mkdir(os.path.join(src_dir, 'test_dir', 'subdir')) - write_file((src_dir, 'test_dir', 'subdir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'subdir', 'test.txt'), '456') invokations = [] @@ -890,9 +889,9 @@ def test_copytree_retains_permissions(self): self.addCleanup(shutil.rmtree, tmp_dir) os.chmod(src_dir, 0o777) - write_file((src_dir, 'permissive.txt'), '123') + create_file((src_dir, 'permissive.txt'), '123') os.chmod(os.path.join(src_dir, 'permissive.txt'), 0o777) - write_file((src_dir, 'restrictive.txt'), '456') + create_file((src_dir, 'restrictive.txt'), '456') os.chmod(os.path.join(src_dir, 'restrictive.txt'), 0o600) restrictive_subdir = tempfile.mkdtemp(dir=src_dir) self.addCleanup(os_helper.rmtree, restrictive_subdir) @@ -935,8 +934,7 @@ def custom_cpfun(a, b): flag = [] src = self.mkdtemp() dst = tempfile.mktemp(dir=self.mkdtemp()) - with open(os.path.join(src, 'foo'), 'w', encoding='utf-8') as f: - f.close() + create_file(os.path.join(src, 'foo')) shutil.copytree(src, dst, copy_function=custom_cpfun) self.assertEqual(len(flag), 1) @@ -971,9 +969,9 @@ def test_copytree_named_pipe(self): def test_copytree_special_func(self): src_dir = self.mkdtemp() dst_dir = os.path.join(self.mkdtemp(), 'destination') - write_file((src_dir, 'test.txt'), '123') + create_file((src_dir, 'test.txt'), '123') os.mkdir(os.path.join(src_dir, 'test_dir')) - write_file((src_dir, 'test_dir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'test.txt'), '456') copied = [] def _copy(src, dst): @@ -986,7 +984,7 @@ def _copy(src, dst): def test_copytree_dangling_symlinks(self): src_dir = self.mkdtemp() valid_file = os.path.join(src_dir, 'test.txt') - write_file(valid_file, 'abc') + create_file(valid_file, 'abc') dir_a = os.path.join(src_dir, 'dir_a') os.mkdir(dir_a) for d in src_dir, dir_a: @@ -1014,8 +1012,7 @@ def test_copytree_symlink_dir(self): src_dir = self.mkdtemp() dst_dir = os.path.join(self.mkdtemp(), 'destination') os.mkdir(os.path.join(src_dir, 'real_dir')) - with open(os.path.join(src_dir, 'real_dir', 'test.txt'), 'wb'): - pass + create_file(os.path.join(src_dir, 'real_dir', 'test.txt')) os.symlink(os.path.join(src_dir, 'real_dir'), os.path.join(src_dir, 'link_to_dir'), target_is_directory=True) @@ -1035,7 +1032,7 @@ def test_copytree_return_value(self): dst_dir = src_dir + "dest" self.addCleanup(shutil.rmtree, dst_dir, True) src = os.path.join(src_dir, 'foo') - write_file(src, 'foo') + create_file(src, 'foo') rv = shutil.copytree(src_dir, dst_dir) self.assertEqual(['foo'], os.listdir(rv)) @@ -1047,7 +1044,7 @@ def test_copytree_subdirectory(self): dst_dir = os.path.join(src_dir, "somevendor", "1.0") os.makedirs(src_dir) src = os.path.join(src_dir, 'pol') - write_file(src, 'pol') + create_file(src, 'pol') rv = shutil.copytree(src_dir, dst_dir) self.assertEqual(['pol'], os.listdir(rv)) @@ -1062,8 +1059,8 @@ def test_copymode_follow_symlinks(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'quux') - write_file(src, 'foo') - write_file(dst, 'foo') + create_file(src, 'foo') + create_file(dst, 'foo') os.symlink(src, src_link) os.symlink(dst, dst_link) os.chmod(src, stat.S_IRWXU|stat.S_IRWXG) @@ -1095,8 +1092,8 @@ def test_copymode_symlink_to_symlink(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'quux') - write_file(src, 'foo') - write_file(dst, 'foo') + create_file(src, 'foo') + create_file(dst, 'foo') os.symlink(src, src_link) os.symlink(dst, dst_link) os.chmod(src, stat.S_IRWXU|stat.S_IRWXG) @@ -1126,8 +1123,8 @@ def test_copymode_symlink_to_symlink_wo_lchmod(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'quux') - write_file(src, 'foo') - write_file(dst, 'foo') + create_file(src, 'foo') + create_file(dst, 'foo') os.symlink(src, src_link) os.symlink(dst, dst_link) shutil.copymode(src_link, dst_link, follow_symlinks=False) # silent fail @@ -1141,11 +1138,11 @@ def test_copystat_symlinks(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'qux') - write_file(src, 'foo') + create_file(src, 'foo') src_stat = os.stat(src) os.utime(src, (src_stat.st_atime, src_stat.st_mtime - 42.0)) # ensure different mtimes - write_file(dst, 'bar') + create_file(dst, 'bar') self.assertNotEqual(os.stat(src).st_mtime, os.stat(dst).st_mtime) os.symlink(src, src_link) os.symlink(dst, dst_link) @@ -1185,8 +1182,8 @@ def test_copystat_handles_harmless_chflags_errors(self): tmpdir = self.mkdtemp() file1 = os.path.join(tmpdir, 'file1') file2 = os.path.join(tmpdir, 'file2') - write_file(file1, 'xxx') - write_file(file2, 'xxx') + create_file(file1, 'xxx') + create_file(file2, 'xxx') def make_chflags_raiser(err): ex = OSError() @@ -1212,9 +1209,9 @@ def _chflags_raiser(path, flags, *, follow_symlinks=True): def test_copyxattr(self): tmp_dir = self.mkdtemp() src = os.path.join(tmp_dir, 'foo') - write_file(src, 'foo') + create_file(src, 'foo') dst = os.path.join(tmp_dir, 'bar') - write_file(dst, 'bar') + create_file(dst, 'bar') # no xattr == no problem shutil._copyxattr(src, dst) @@ -1228,7 +1225,7 @@ def test_copyxattr(self): os.getxattr(dst, 'user.foo')) # check errors don't affect other attrs os.remove(dst) - write_file(dst, 'bar') + create_file(dst, 'bar') os_error = OSError(errno.EPERM, 'EPERM') def _raise_on_user_foo(fname, attr, val, **kwargs): @@ -1258,15 +1255,15 @@ def _raise_on_src(fname, *, follow_symlinks=True): # test that shutil.copystat copies xattrs src = os.path.join(tmp_dir, 'the_original') srcro = os.path.join(tmp_dir, 'the_original_ro') - write_file(src, src) - write_file(srcro, srcro) + create_file(src, src) + create_file(srcro, srcro) os.setxattr(src, 'user.the_value', b'fiddly') os.setxattr(srcro, 'user.the_value', b'fiddly') os.chmod(srcro, 0o444) dst = os.path.join(tmp_dir, 'the_copy') dstro = os.path.join(tmp_dir, 'the_copy_ro') - write_file(dst, dst) - write_file(dstro, dstro) + create_file(dst, dst) + create_file(dstro, dstro) shutil.copystat(src, dst) shutil.copystat(srcro, dstro) self.assertEqual(os.getxattr(dst, 'user.the_value'), b'fiddly') @@ -1282,13 +1279,13 @@ def test_copyxattr_symlinks(self): tmp_dir = self.mkdtemp() src = os.path.join(tmp_dir, 'foo') src_link = os.path.join(tmp_dir, 'baz') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, src_link) os.setxattr(src, 'trusted.foo', b'42') os.setxattr(src_link, 'trusted.foo', b'43', follow_symlinks=False) dst = os.path.join(tmp_dir, 'bar') dst_link = os.path.join(tmp_dir, 'qux') - write_file(dst, 'bar') + create_file(dst, 'bar') os.symlink(dst, dst_link) shutil._copyxattr(src_link, dst_link, follow_symlinks=False) self.assertEqual(os.getxattr(dst_link, 'trusted.foo', follow_symlinks=False), b'43') @@ -1301,7 +1298,7 @@ def test_copyxattr_symlinks(self): def _copy_file(self, method): fname = 'test.txt' tmpdir = self.mkdtemp() - write_file((tmpdir, fname), 'xxx') + create_file((tmpdir, fname), 'xxx') file1 = os.path.join(tmpdir, fname) tmpdir2 = self.mkdtemp() method(file1, tmpdir2) @@ -1320,7 +1317,7 @@ def test_copy_symlinks(self): src = os.path.join(tmp_dir, 'foo') dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, src_link) if hasattr(os, 'lchmod'): os.lchmod(src_link, stat.S_IRWXU | stat.S_IRWXO) @@ -1362,7 +1359,7 @@ def test_copy2_symlinks(self): src = os.path.join(tmp_dir, 'foo') dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, src_link) if hasattr(os, 'lchmod'): os.lchmod(src_link, stat.S_IRWXU | stat.S_IRWXO) @@ -1396,7 +1393,7 @@ def test_copy2_xattr(self): tmp_dir = self.mkdtemp() src = os.path.join(tmp_dir, 'foo') dst = os.path.join(tmp_dir, 'bar') - write_file(src, 'foo') + create_file(src, 'foo') os.setxattr(src, 'user.foo', b'42') shutil.copy2(src, dst) self.assertEqual( @@ -1410,7 +1407,7 @@ def test_copy_return_value(self): src_dir = self.mkdtemp() dst_dir = self.mkdtemp() src = os.path.join(src_dir, 'foo') - write_file(src, 'foo') + create_file(src, 'foo') rv = fn(src, dst_dir) self.assertEqual(rv, os.path.join(dst_dir, 'foo')) rv = fn(src, os.path.join(dst_dir, 'bar')) @@ -1427,7 +1424,7 @@ def _test_copy_dir(self, copy_func): src_file = os.path.join(src_dir, 'foo') dir2 = self.mkdtemp() dst = os.path.join(src_dir, 'does_not_exist/') - write_file(src_file, 'foo') + create_file(src_file, 'foo') if sys.platform == "win32": err = PermissionError else: @@ -1447,7 +1444,7 @@ def test_copyfile_symlinks(self): dst = os.path.join(tmp_dir, 'dst') dst_link = os.path.join(tmp_dir, 'dst_link') link = os.path.join(tmp_dir, 'link') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, link) # don't follow shutil.copyfile(link, dst_link, follow_symlinks=False) @@ -1464,8 +1461,7 @@ def test_dont_copy_file_onto_link_to_itself(self): src = os.path.join(TESTFN, 'cheese') dst = os.path.join(TESTFN, 'shop') try: - with open(src, 'w', encoding='utf-8') as f: - f.write('cheddar') + create_file(src, 'cheddar') try: os.link(src, dst) except PermissionError as e: @@ -1484,8 +1480,7 @@ def test_dont_copy_file_onto_symlink_to_itself(self): src = os.path.join(TESTFN, 'cheese') dst = os.path.join(TESTFN, 'shop') try: - with open(src, 'w', encoding='utf-8') as f: - f.write('cheddar') + create_file(src, 'cheddar') # Using `src` here would mean we end up with a symlink pointing # to TESTFN/TESTFN/cheese, while it should point at # TESTFN/cheese. @@ -1520,7 +1515,7 @@ def test_copyfile_return_value(self): dst_dir = self.mkdtemp() dst_file = os.path.join(dst_dir, 'bar') src_file = os.path.join(src_dir, 'foo') - write_file(src_file, 'foo') + create_file(src_file, 'foo') rv = shutil.copyfile(src_file, dst_file) self.assertTrue(os.path.exists(rv)) self.assertEqual(read_file(src_file), read_file(dst_file)) @@ -1530,7 +1525,7 @@ def test_copyfile_same_file(self): # are the same. src_dir = self.mkdtemp() src_file = os.path.join(src_dir, 'foo') - write_file(src_file, 'foo') + create_file(src_file, 'foo') self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file) # But Error should work too, to stay backward compatible. self.assertRaises(Error, shutil.copyfile, src_file, src_file) @@ -1547,7 +1542,7 @@ def test_copyfile_nonexistent_dir(self): src_dir = self.mkdtemp() src_file = os.path.join(src_dir, 'foo') dst = os.path.join(src_dir, 'does_not_exist/') - write_file(src_file, 'foo') + create_file(src_file, 'foo') self.assertRaises(FileNotFoundError, shutil.copyfile, src_file, dst) def test_copyfile_copy_dir(self): @@ -1558,7 +1553,7 @@ def test_copyfile_copy_dir(self): src_file = os.path.join(src_dir, 'foo') dir2 = self.mkdtemp() dst = os.path.join(src_dir, 'does_not_exist/') - write_file(src_file, 'foo') + create_file(src_file, 'foo') if sys.platform == "win32": err = PermissionError else: @@ -1584,13 +1579,13 @@ def _create_files(self, base_dir='dist'): root_dir = self.mkdtemp() dist = os.path.join(root_dir, base_dir) os.makedirs(dist, exist_ok=True) - write_file((dist, 'file1'), 'xxx') - write_file((dist, 'file2'), 'xxx') + create_file((dist, 'file1'), 'xxx') + create_file((dist, 'file2'), 'xxx') os.mkdir(os.path.join(dist, 'sub')) - write_file((dist, 'sub', 'file3'), 'xxx') + create_file((dist, 'sub', 'file3'), 'xxx') os.mkdir(os.path.join(dist, 'sub2')) if base_dir: - write_file((root_dir, 'outer'), 'xxx') + create_file((root_dir, 'outer'), 'xxx') return root_dir, base_dir @support.requires_zlib() @@ -2170,7 +2165,7 @@ def test_disk_usage(self): def test_chown(self): dirname = self.mkdtemp() filename = tempfile.mktemp(dir=dirname) - write_file(filename, 'testing chown function') + create_file(filename, 'testing chown function') with self.assertRaises(ValueError): shutil.chown(filename) @@ -2231,37 +2226,41 @@ def check_chown(path, uid=None, gid=None): class TestWhich(BaseTest, unittest.TestCase): def setUp(self): - self.temp_dir = self.mkdtemp(prefix="Tmp") + temp_dir = self.mkdtemp(prefix="Tmp") + base_dir = os.path.join(temp_dir, TESTFN + '-basedir') + os.mkdir(base_dir) + self.dir = os.path.join(base_dir, TESTFN + '-dir') + os.mkdir(self.dir) + self.other_dir = os.path.join(base_dir, TESTFN + '-dir2') + os.mkdir(self.other_dir) # Give the temp_file an ".exe" suffix for all. # It's needed on Windows and not harmful on other platforms. - self.temp_file = tempfile.NamedTemporaryFile(dir=self.temp_dir, - prefix="Tmp", - suffix=".Exe") - os.chmod(self.temp_file.name, stat.S_IXUSR) - self.addCleanup(self.temp_file.close) - self.dir, self.file = os.path.split(self.temp_file.name) + self.file = TESTFN + '.Exe' + self.filepath = os.path.join(self.dir, self.file) + self.create_file(self.filepath) self.env_path = self.dir self.curdir = os.curdir self.ext = ".EXE" - def to_text_type(self, s): - ''' - In this class we're testing with str, so convert s to a str - ''' - if isinstance(s, bytes): - return s.decode() - return s + to_text_type = staticmethod(os.fsdecode) + + def create_file(self, path): + create_file(path) + os.chmod(path, 0o755) + + def assertNormEqual(self, actual, expected): + self.assertEqual(os.path.normcase(actual), os.path.normcase(expected)) def test_basic(self): # Given an EXE in a directory, it should be returned. rv = shutil.which(self.file, path=self.dir) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) def test_absolute_cmd(self): # When given the fully qualified path to an executable that exists, # it should be returned. - rv = shutil.which(self.temp_file.name, path=self.temp_dir) - self.assertEqual(rv, self.temp_file.name) + rv = shutil.which(self.filepath, path=self.other_dir) + self.assertEqual(rv, self.filepath) def test_relative_cmd(self): # When given the relative path with a directory part to an executable @@ -2269,7 +2268,7 @@ def test_relative_cmd(self): base_dir, tail_dir = os.path.split(self.dir) relpath = os.path.join(tail_dir, self.file) with os_helper.change_cwd(path=base_dir): - rv = shutil.which(relpath, path=self.temp_dir) + rv = shutil.which(relpath, path=self.other_dir) self.assertEqual(rv, relpath) # But it shouldn't be searched in PATH directories (issue #16957). with os_helper.change_cwd(path=self.dir): @@ -2280,9 +2279,8 @@ def test_relative_cmd(self): "test is for non win32") def test_cwd_non_win32(self): # Issue #16957 - base_dir = os.path.dirname(self.dir) with os_helper.change_cwd(path=self.dir): - rv = shutil.which(self.file, path=base_dir) + rv = shutil.which(self.file, path=self.other_dir) # non-win32: shouldn't match in the current directory. self.assertIsNone(rv) @@ -2292,57 +2290,32 @@ def test_cwd_win32(self): base_dir = os.path.dirname(self.dir) with os_helper.change_cwd(path=self.dir): with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): - rv = shutil.which(self.file, path=base_dir) + rv = shutil.which(self.file, path=self.other_dir) # Current directory implicitly on PATH self.assertEqual(rv, os.path.join(self.curdir, self.file)) with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=False): - rv = shutil.which(self.file, path=base_dir) + rv = shutil.which(self.file, path=self.other_dir) # Current directory not on PATH self.assertIsNone(rv) @unittest.skipUnless(sys.platform == "win32", "test is for win32") def test_cwd_win32_added_before_all_other_path(self): - base_dir = pathlib.Path(os.fsdecode(self.dir)) - - elsewhere_in_path_dir = base_dir / 'dir1' - elsewhere_in_path_dir.mkdir() - match_elsewhere_in_path = elsewhere_in_path_dir / 'hello.exe' - match_elsewhere_in_path.touch() - - exe_in_cwd = base_dir / 'hello.exe' - exe_in_cwd.touch() - - with os_helper.change_cwd(path=base_dir): - with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): - rv = shutil.which('hello.exe', path=elsewhere_in_path_dir) - - self.assertEqual(os.path.abspath(rv), os.path.abspath(exe_in_cwd)) - - @unittest.skipUnless(sys.platform == "win32", - "test is for win32") - def test_pathext_match_before_path_full_match(self): - base_dir = pathlib.Path(os.fsdecode(self.dir)) - dir1 = base_dir / 'dir1' - dir2 = base_dir / 'dir2' - dir1.mkdir() - dir2.mkdir() - - pathext_match = dir1 / 'hello.com.exe' - path_match = dir2 / 'hello.com' - pathext_match.touch() - path_match.touch() - - test_path = os.pathsep.join([str(dir1), str(dir2)]) - assert os.path.basename(shutil.which( - 'hello.com', path=test_path, mode = os.F_OK - )).lower() == 'hello.com.exe' + other_file_path = os.path.join(self.other_dir, self.file) + self.create_file(other_file_path) + with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): + with os_helper.change_cwd(path=self.dir): + rv = shutil.which(self.file, path=self.other_dir) + self.assertEqual(rv, os.path.join(self.curdir, self.file)) + with os_helper.change_cwd(path=self.other_dir): + rv = shutil.which(self.file, path=self.dir) + self.assertEqual(rv, os.path.join(self.curdir, self.file)) @os_helper.skip_if_dac_override def test_non_matching_mode(self): # Set the file read-only and ask for writeable files. - os.chmod(self.temp_file.name, stat.S_IREAD) - if os.access(self.temp_file.name, os.W_OK): + os.chmod(self.filepath, stat.S_IREAD) + if os.access(self.filepath, os.W_OK): self.skipTest("can't set the file read-only") rv = shutil.which(self.file, path=self.dir, mode=os.W_OK) self.assertIsNone(rv) @@ -2364,13 +2337,13 @@ def test_pathext_checking(self): # Ask for the file without the ".exe" extension, then ensure that # it gets found properly with the extension. rv = shutil.which(self.file[:-4], path=self.dir) - self.assertEqual(rv, self.temp_file.name[:-4] + self.ext) + self.assertEqual(rv, self.filepath[:-4] + self.ext) def test_environ_path(self): with os_helper.EnvironmentVarGuard() as env: env['PATH'] = self.env_path rv = shutil.which(self.file) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) def test_environ_path_empty(self): # PATH='': no match @@ -2384,12 +2357,9 @@ def test_environ_path_empty(self): self.assertIsNone(rv) def test_environ_path_cwd(self): - expected_cwd = os.path.basename(self.temp_file.name) + expected_cwd = self.file if sys.platform == "win32": - curdir = os.curdir - if isinstance(expected_cwd, bytes): - curdir = os.fsencode(curdir) - expected_cwd = os.path.join(curdir, expected_cwd) + expected_cwd = os.path.join(self.curdir, expected_cwd) # PATH=':': explicitly looks in the current directory with os_helper.EnvironmentVarGuard() as env: @@ -2414,14 +2384,14 @@ def test_environ_path_missing(self): create=True), \ support.swap_attr(os, 'defpath', self.dir): rv = shutil.which(self.file) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) # with confstr with unittest.mock.patch('os.confstr', return_value=self.dir, \ create=True), \ support.swap_attr(os, 'defpath', ''): rv = shutil.which(self.file) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) def test_empty_path(self): base_dir = os.path.dirname(self.dir) @@ -2439,50 +2409,88 @@ def test_empty_path_no_PATH(self): @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') def test_pathext(self): - ext = self.to_text_type(".xyz") - temp_filexyz = tempfile.NamedTemporaryFile(dir=self.temp_dir, - prefix=self.to_text_type("Tmp2"), suffix=ext) - os.chmod(temp_filexyz.name, stat.S_IXUSR) - self.addCleanup(temp_filexyz.close) - - # strip path and extension - program = os.path.basename(temp_filexyz.name) - program = os.path.splitext(program)[0] - + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) with os_helper.EnvironmentVarGuard() as env: - env['PATHEXT'] = ext if isinstance(ext, str) else ext.decode() - rv = shutil.which(program, path=self.temp_dir) - self.assertEqual(rv, temp_filexyz.name) + env['PATHEXT'] = ext + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) # Issue 40592: See https://bugs.python.org/issue40592 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') def test_pathext_with_empty_str(self): - ext = self.to_text_type(".xyz") - temp_filexyz = tempfile.NamedTemporaryFile(dir=self.temp_dir, - prefix=self.to_text_type("Tmp2"), suffix=ext) - self.addCleanup(temp_filexyz.close) + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) + with os_helper.EnvironmentVarGuard() as env: + env['PATHEXT'] = ext + ';' # note the ; + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) - # strip path and extension - program = os.path.basename(temp_filexyz.name) - program = os.path.splitext(program)[0] + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_with_multidot_extension(self): + ext = '.foo.bar' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) + with os_helper.EnvironmentVarGuard() as env: + env['PATHEXT'] = ext + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_with_null_extension(self): + cmd = self.to_text_type(TESTFN2) + cmddot = cmd + self.to_text_type('.') + filepath = os.path.join(self.dir, cmd) + self.create_file(filepath) with os_helper.EnvironmentVarGuard() as env: - env['PATHEXT'] = f"{ext if isinstance(ext, str) else ext.decode()};" # note the ; - rv = shutil.which(program, path=self.temp_dir) - self.assertEqual(rv, temp_filexyz.name) + env['PATHEXT'] = '.xyz' + self.assertIsNone(shutil.which(cmd, path=self.dir)) + self.assertIsNone(shutil.which(cmddot, path=self.dir)) + env['PATHEXT'] = '.xyz;.' # note the . + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmddot, path=self.dir), + filepath + self.to_text_type('.')) + env['PATHEXT'] = '.xyz;..' # multiple dots + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmddot, path=self.dir), + filepath + self.to_text_type('.')) + + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_extension_ends_with_dot(self): + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + dot = self.to_text_type('.') + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) + with os_helper.EnvironmentVarGuard() as env: + env['PATHEXT'] = ext + '.' + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) # cmd.exe hangs here + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) + self.assertIsNone(shutil.which(cmd + dot, path=self.dir)) + self.assertIsNone(shutil.which(cmdext + dot, path=self.dir)) # See GH-75586 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') def test_pathext_applied_on_files_in_path(self): + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) with os_helper.EnvironmentVarGuard() as env: - env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode() - env["PATHEXT"] = ".test" - - test_path = os.path.join(self.temp_dir, self.to_text_type("test_program.test")) - open(test_path, 'w').close() - os.chmod(test_path, 0o755) - - self.assertEqual(shutil.which(self.to_text_type("test_program")), test_path) + env["PATH"] = os.fsdecode(self.dir) + env["PATHEXT"] = ext + self.assertEqual(shutil.which(cmd), filepath) + self.assertEqual(shutil.which(cmdext), filepath) # See GH-75586 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') @@ -2498,49 +2506,107 @@ def test_win_path_needs_curdir(self): self.assertFalse(shutil._win_path_needs_curdir('dontcare', os.X_OK)) need_curdir_mock.assert_called_once_with('dontcare') - # See GH-109590 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') - def test_pathext_preferred_for_execute(self): - with os_helper.EnvironmentVarGuard() as env: - env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode() - env["PATHEXT"] = ".test" - - exe = os.path.join(self.temp_dir, self.to_text_type("test.exe")) - open(exe, 'w').close() - os.chmod(exe, 0o755) + def test_same_dir_with_pathext_extension(self): + cmd = self.file # with .exe extension + # full match + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + self.filepath) + + cmd2 = cmd + self.to_text_type('.com') # with .exe.com extension + other_file_path = os.path.join(self.dir, cmd2) + self.create_file(other_file_path) + + # full match + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + self.filepath) + self.assertNormEqual(shutil.which(cmd2, path=self.dir), other_file_path) + self.assertNormEqual(shutil.which(cmd2, path=self.dir, mode=os.F_OK), + other_file_path) - # default behavior allows a direct match if nothing in PATHEXT matches - self.assertEqual(shutil.which(self.to_text_type("test.exe")), exe) - - dot_test = os.path.join(self.temp_dir, self.to_text_type("test.exe.test")) - open(dot_test, 'w').close() - os.chmod(dot_test, 0o755) - - # now we have a PATHEXT match, so it take precedence - self.assertEqual(shutil.which(self.to_text_type("test.exe")), dot_test) - - # but if we don't use os.X_OK we don't change the order based off PATHEXT - # and therefore get the direct match. - self.assertEqual(shutil.which(self.to_text_type("test.exe"), mode=os.F_OK), exe) - - # See GH-109590 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') - def test_pathext_given_extension_preferred(self): - with os_helper.EnvironmentVarGuard() as env: - env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode() - env["PATHEXT"] = ".exe2;.exe" + def test_same_dir_without_pathext_extension(self): + cmd = self.file[:-4] # without .exe extension + # pathext match + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + self.filepath) + + # without extension + other_file_path = os.path.join(self.dir, cmd) + self.create_file(other_file_path) + + # pathext match if mode contains X_OK + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + # full match + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + other_file_path) + self.assertNormEqual(shutil.which(self.file, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(self.file, path=self.dir, mode=os.F_OK), + self.filepath) - exe = os.path.join(self.temp_dir, self.to_text_type("test.exe")) - open(exe, 'w').close() - os.chmod(exe, 0o755) - - exe2 = os.path.join(self.temp_dir, self.to_text_type("test.exe2")) - open(exe2, 'w').close() - os.chmod(exe2, 0o755) + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_dir_order_with_pathext_extension(self): + cmd = self.file # with .exe extension + search_path = os.pathsep.join([os.fsdecode(self.other_dir), + os.fsdecode(self.dir)]) + # full match in the second directory + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) + + cmd2 = cmd + self.to_text_type('.com') # with .exe.com extension + other_file_path = os.path.join(self.other_dir, cmd2) + self.create_file(other_file_path) + + # pathext match in the first directory + self.assertNormEqual(shutil.which(cmd, path=search_path), other_file_path) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + other_file_path) + # full match in the first directory + self.assertNormEqual(shutil.which(cmd2, path=search_path), other_file_path) + self.assertNormEqual(shutil.which(cmd2, path=search_path, mode=os.F_OK), + other_file_path) + + # full match in the first directory + search_path = os.pathsep.join([os.fsdecode(self.dir), + os.fsdecode(self.other_dir)]) + self.assertEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) - # even though .exe2 is preferred in PATHEXT, we matched directly to test.exe - self.assertEqual(shutil.which(self.to_text_type("test.exe")), exe) - self.assertEqual(shutil.which(self.to_text_type("test")), exe2) + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_dir_order_without_pathext_extension(self): + cmd = self.file[:-4] # without .exe extension + search_path = os.pathsep.join([os.fsdecode(self.other_dir), + os.fsdecode(self.dir)]) + # pathext match in the second directory + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) + + # without extension + other_file_path = os.path.join(self.other_dir, cmd) + self.create_file(other_file_path) + + # pathext match in the second directory + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + # full match in the first directory + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + other_file_path) + # full match in the second directory + self.assertNormEqual(shutil.which(self.file, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(self.file, path=search_path, mode=os.F_OK), + self.filepath) + + # pathext match in the first directory + search_path = os.pathsep.join([os.fsdecode(self.dir), + os.fsdecode(self.other_dir)]) + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) class TestWhichBytes(TestWhich): @@ -2548,18 +2614,12 @@ def setUp(self): TestWhich.setUp(self) self.dir = os.fsencode(self.dir) self.file = os.fsencode(self.file) - self.temp_file.name = os.fsencode(self.temp_file.name) - self.temp_dir = os.fsencode(self.temp_dir) + self.filepath = os.fsencode(self.filepath) + self.other_dir = os.fsencode(self.other_dir) self.curdir = os.fsencode(self.curdir) self.ext = os.fsencode(self.ext) - def to_text_type(self, s): - ''' - In this class we're testing with bytes, so convert s to a bytes - ''' - if isinstance(s, str): - return s.encode() - return s + to_text_type = staticmethod(os.fsencode) class TestMove(BaseTest, unittest.TestCase): @@ -2570,8 +2630,7 @@ def setUp(self): self.dst_dir = self.mkdtemp() self.src_file = os.path.join(self.src_dir, filename) self.dst_file = os.path.join(self.dst_dir, filename) - with open(self.src_file, "wb") as f: - f.write(b"spam") + create_file(self.src_file, b"spam") def _check_move_file(self, src, dst, real_dst): with open(src, "rb") as f: @@ -2649,8 +2708,7 @@ def test_move_dir_altsep_to_dir(self): def test_existing_file_inside_dest_dir(self): # A file with the same name inside the destination dir already exists. - with open(self.dst_file, "wb"): - pass + create_file(self.dst_file) self.assertRaises(shutil.Error, shutil.move, self.src_file, self.dst_dir) def test_dont_move_dir_in_itself(self): @@ -3065,8 +3123,7 @@ def test_empty_file(self): dstname = TESTFN + 'dst' self.addCleanup(lambda: os_helper.unlink(srcname)) self.addCleanup(lambda: os_helper.unlink(dstname)) - with open(srcname, "wb"): - pass + create_file(srcname) with open(srcname, "rb") as src: with open(dstname, "wb") as dst: @@ -3189,7 +3246,7 @@ def test_blocksize_arg(self): self.assertEqual(blocksize, os.path.getsize(TESTFN)) # ...unless we're dealing with a small file. os_helper.unlink(TESTFN2) - write_file(TESTFN2, b"hello", binary=True) + create_file(TESTFN2, b"hello") self.addCleanup(os_helper.unlink, TESTFN2 + '3') self.assertRaises(ZeroDivisionError, shutil.copyfile, TESTFN2, TESTFN2 + '3') diff --git a/Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst b/Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst new file mode 100644 index 00000000000000..6bb7abfdd50040 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst @@ -0,0 +1,4 @@ +Fix :mod:`shutil.which` on Windows. Now it looks at direct match if and only +if the command ends with a PATHEXT extension or X_OK is not in mode. Support +extensionless files if "." is in PATHEXT. Support PATHEXT extensions that end +with a dot. From f1e74248025b36a0c5d12f72c4ab713f4682f523 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Nov 2024 21:56:39 +0200 Subject: [PATCH 15/43] [3.12] gh-109746: Make _thread.start_new_thread delete state of new thread on its startup failure (GH-109761) (GH-127173) If Python fails to start newly created thread due to failure of underlying PyThread_start_new_thread() call, its state should be removed from interpreter' thread states list to avoid its double cleanup. (cherry picked from commit ca3ea9ad05c3d876a58463595e5b4228fda06936) Co-authored-by: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> --- Lib/test/test_threading.py | 35 +++++++++++++++++++ ...-09-22-21-01-56.gh-issue-109746.32MHt9.rst | 1 + Modules/_threadmodule.c | 1 + Python/pystate.c | 4 ++- 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 2e4b860b975ff8..183af7c1f0836f 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1150,6 +1150,41 @@ def __del__(self): self.assertEqual(out.strip(), b"OK") self.assertIn(b"can't create new thread at interpreter shutdown", err) + def test_start_new_thread_failed(self): + # gh-109746: if Python fails to start newly created thread + # due to failure of underlying PyThread_start_new_thread() call, + # its state should be removed from interpreter' thread states list + # to avoid its double cleanup + try: + from resource import setrlimit, RLIMIT_NPROC + except ImportError as err: + self.skipTest(err) # RLIMIT_NPROC is specific to Linux and BSD + code = """if 1: + import resource + import _thread + + def f(): + print("shouldn't be printed") + + limits = resource.getrlimit(resource.RLIMIT_NPROC) + [_, hard] = limits + resource.setrlimit(resource.RLIMIT_NPROC, (0, hard)) + + try: + _thread.start_new_thread(f, ()) + except RuntimeError: + print('ok') + else: + print('skip') + """ + _, out, err = assert_python_ok("-u", "-c", code) + out = out.strip() + if out == b'skip': + self.skipTest('RLIMIT_NPROC had no effect; probably superuser') + self.assertEqual(out, b'ok') + self.assertEqual(err, b'') + + class ThreadJoinOnShutdown(BaseTestCase): def _run_and_join(self, script): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst new file mode 100644 index 00000000000000..2d350c33aa6975 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst @@ -0,0 +1 @@ +If :func:`!_thread.start_new_thread` fails to start a new thread, it deletes its state from interpreter and thus avoids its repeated cleanup on finalization. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 365f4460088aab..518c246e98caf6 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1219,6 +1219,7 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) if (ident == PYTHREAD_INVALID_THREAD_ID) { PyErr_SetString(ThreadError, "can't start new thread"); PyThreadState_Clear(boot->tstate); + PyThreadState_Delete(boot->tstate); thread_bootstate_free(boot, 1); return NULL; } diff --git a/Python/pystate.c b/Python/pystate.c index d0651fbd592f43..f0e0d4117e4259 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1593,7 +1593,9 @@ tstate_delete_common(PyThreadState *tstate) if (tstate->_status.bound_gilstate) { unbind_gilstate_tstate(tstate); } - unbind_tstate(tstate); + if (tstate->_status.bound) { + unbind_tstate(tstate); + } // XXX Move to PyThreadState_Clear()? clear_datastack(tstate); From 6d2821c8331315a42cd79d677c409418f6599294 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 23 Nov 2024 20:55:55 +0100 Subject: [PATCH 16/43] [3.12] Doc: C API: Fix `Py_NewInterpreterFromConfig` example code (GH-126667) (#127202) Doc: C API: Fix `Py_NewInterpreterFromConfig` example code (GH-126667) (cherry picked from commit e3038e976b25a58f512d8c7083a752c89436eb0d) Co-authored-by: Richard Hansen --- Doc/c-api/init.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index bec075038f06e0..aacbab0b3532e7 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1644,7 +1644,11 @@ function. You can create and destroy them using the following functions: .check_multi_interp_extensions = 1, .gil = PyInterpreterConfig_OWN_GIL, }; - PyThreadState *tstate = Py_NewInterpreterFromConfig(&config); + PyThreadState *tstate = NULL; + PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config); + if (PyStatus_Exception(status)) { + Py_ExitStatusException(status); + } Note that the config is used only briefly and does not get modified. During initialization the config's values are converted into various From e26ba9670f31df3db471365ddef7b0c293ef6871 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 23 Nov 2024 22:45:47 +0100 Subject: [PATCH 17/43] [3.12] Fix "useable" typo in docs (GH-127200) (#127206) Fix "useable" typo in docs (GH-127200) Fix typo in docs (cherry picked from commit dbd23790dbd662169905be6300259992639d4e69) Co-authored-by: Stan U <89152624+StanFromIreland@users.noreply.github.com> --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index f7fbe2083d8774..0e8da6088322bd 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,7 @@ the executable is called ``python.exe``; elsewhere it's just ``python``. Building a complete Python installation requires the use of various additional third-party libraries, depending on your build platform and configure options. Not all standard library modules are buildable or -useable on all platforms. Refer to the +usable on all platforms. Refer to the `Install dependencies `_ section of the `Developer Guide`_ for current detailed information on dependencies for various Linux distributions and macOS. From b52ab48a10276ea332cd80e5fcc0c0dd2a4b7e02 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 24 Nov 2024 18:48:12 +0100 Subject: [PATCH 18/43] [3.12] Improve `pathname2url()` and `url2pathname()` docs (GH-127125) (#127233) Improve `pathname2url()` and `url2pathname()` docs (GH-127125) These functions have long sown confusion among Python developers. The existing documentation says they deal with URL path components, but that doesn't fit the evidence on Windows: >>> pathname2url(r'C:\foo') '///C:/foo' >>> pathname2url(r'\\server\share') '////server/share' # or '//server/share' as of quite recently If these were URL path components, they would imply complete URLs like `file://///C:/foo` and `file://////server/share`. Clearly this isn't right. Yet the implementation in `nturl2path` is deliberate, and the `url2pathname()` function correctly inverts it. On non-Windows platforms, the behaviour until quite recently is to simply quote/unquote the path without adding or removing any leading slashes. This behaviour is compatible with *both* interpretations -- 1) the value is a URL path component (existing docs), and 2) the value is everything following `file:` (this commit) The conclusion I draw is that these functions operate on everything after the `file:` prefix, which may include an authority section. This is the only explanation that fits both the Windows and non-Windows behaviour. It's also a better match for the function names. (cherry picked from commit 307c63358681d669ae39e5ecd814bded4a93443a) Co-authored-by: Barney Gale --- Doc/library/urllib.request.rst | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index d7de8a16438110..0c5aa0458c6ebe 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -160,16 +160,28 @@ The :mod:`urllib.request` module defines the following functions: .. function:: pathname2url(path) - Convert the pathname *path* from the local syntax for a path to the form used in - the path component of a URL. This does not produce a complete URL. The return - value will already be quoted using the :func:`~urllib.parse.quote` function. + Convert the given local path to a ``file:`` URL. This function uses + :func:`~urllib.parse.quote` function to encode the path. For historical + reasons, the return value omits the ``file:`` scheme prefix. This example + shows the function being used on Windows:: + >>> from urllib.request import pathname2url + >>> path = 'C:\\Program Files' + >>> 'file:' + pathname2url(path) + 'file:///C:/Program%20Files' -.. function:: url2pathname(path) - Convert the path component *path* from a percent-encoded URL to the local syntax for a - path. This does not accept a complete URL. This function uses - :func:`~urllib.parse.unquote` to decode *path*. +.. function:: url2pathname(url) + + Convert the given ``file:`` URL to a local path. This function uses + :func:`~urllib.parse.unquote` to decode the URL. For historical reasons, + the given value *must* omit the ``file:`` scheme prefix. This example shows + the function being used on Windows:: + + >>> from urllib.request import url2pathname + >>> url = 'file:///C:/Program%20Files' + >>> url2pathname(url.removeprefix('file:')) + 'C:\\Program Files' .. function:: getproxies() From 5bfacd26e84c2f2b09e06634954970781dfc3c35 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 24 Nov 2024 19:13:23 +0100 Subject: [PATCH 19/43] [3.12] gh-127217: Fix pathname2url() for paths starting with multiple slashes on Posix (GH-127218) (GH-127231) (cherry picked from commit 97b2ceaaaf88a73a45254912a0e972412879ccbf) Co-authored-by: Serhiy Storchaka --- Lib/test/test_urllib.py | 3 +++ Lib/urllib/request.py | 4 ++++ .../Library/2024-11-24-12-41-31.gh-issue-127217.UAXGFr.rst | 2 ++ 3 files changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-24-12-41-31.gh-issue-127217.UAXGFr.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4bdbc19c1ffb7a..0710950e25a7cd 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1565,6 +1565,9 @@ def test_pathname2url_posix(self): fn = urllib.request.pathname2url self.assertEqual(fn('/'), '/') self.assertEqual(fn('/a/b.c'), '/a/b.c') + self.assertEqual(fn('//a/b.c'), '////a/b.c') + self.assertEqual(fn('///a/b.c'), '/////a/b.c') + self.assertEqual(fn('////a/b.c'), '//////a/b.c') self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index c521d96d953883..9a559f44152be5 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1695,6 +1695,10 @@ def url2pathname(pathname): def pathname2url(pathname): """OS-specific conversion from a file system path to a relative URL of the 'file' scheme; not recommended for general use.""" + if pathname[:2] == '//': + # Add explicitly empty authority to avoid interpreting the path + # as authority. + pathname = '//' + pathname encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return quote(pathname, encoding=encoding, errors=errors) diff --git a/Misc/NEWS.d/next/Library/2024-11-24-12-41-31.gh-issue-127217.UAXGFr.rst b/Misc/NEWS.d/next/Library/2024-11-24-12-41-31.gh-issue-127217.UAXGFr.rst new file mode 100644 index 00000000000000..3139e33302f378 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-24-12-41-31.gh-issue-127217.UAXGFr.rst @@ -0,0 +1,2 @@ +Fix :func:`urllib.request.pathname2url` for paths starting with multiple +slashes on Posix. From 7e901cbdd851b82a32942eda87937e3eda666ee2 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Mon, 25 Nov 2024 17:51:48 +0900 Subject: [PATCH 20/43] [3.12] gh-101100: Fix sphinx warnings of removed opcodes (GH-127222) (#127240) --- Doc/whatsnew/3.4.rst | 2 +- Doc/whatsnew/3.6.rst | 8 ++++---- Doc/whatsnew/3.7.rst | 4 ++-- Doc/whatsnew/3.8.rst | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 33534ff2c93f9e..6213915cc7413f 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -1979,7 +1979,7 @@ Other Improvements now works correctly (previously it silently returned the first python module in the file). (Contributed by Václav Šmilauer in :issue:`16421`.) -* A new opcode, :opcode:`LOAD_CLASSDEREF`, has been added to fix a bug in the +* A new opcode, :opcode:`!LOAD_CLASSDEREF`, has been added to fix a bug in the loading of free variables in class bodies that could be triggered by certain uses of :ref:`__prepare__ `. (Contributed by Benjamin Peterson in :issue:`17853`.) diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 22bcaef33a277e..aa72cf4fdbf031 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -2377,16 +2377,16 @@ There have been several major changes to the :term:`bytecode` in Python 3.6. * The function call opcodes have been heavily reworked for better performance and simpler implementation. - The :opcode:`MAKE_FUNCTION`, :opcode:`CALL_FUNCTION`, - :opcode:`CALL_FUNCTION_KW` and :opcode:`BUILD_MAP_UNPACK_WITH_CALL` opcodes + The :opcode:`MAKE_FUNCTION`, :opcode:`!CALL_FUNCTION`, + :opcode:`!CALL_FUNCTION_KW` and :opcode:`!BUILD_MAP_UNPACK_WITH_CALL` opcodes have been modified, the new :opcode:`CALL_FUNCTION_EX` and - :opcode:`BUILD_TUPLE_UNPACK_WITH_CALL` have been added, and + :opcode:`!BUILD_TUPLE_UNPACK_WITH_CALL` have been added, and ``CALL_FUNCTION_VAR``, ``CALL_FUNCTION_VAR_KW`` and ``MAKE_CLOSURE`` opcodes have been removed. (Contributed by Demur Rumed in :issue:`27095`, and Serhiy Storchaka in :issue:`27213`, :issue:`28257`.) -* The new :opcode:`SETUP_ANNOTATIONS` and :opcode:`STORE_ANNOTATION` opcodes +* The new :opcode:`SETUP_ANNOTATIONS` and :opcode:`!STORE_ANNOTATION` opcodes have been added to support the new :term:`variable annotation` syntax. (Contributed by Ivan Levkivskyi in :issue:`27985`.) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 996d0a93cae5fc..7e4b189dfe44a8 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -2476,10 +2476,10 @@ avoiding possible problems use new functions :c:func:`PySlice_Unpack` and CPython bytecode changes ------------------------ -There are two new opcodes: :opcode:`LOAD_METHOD` and :opcode:`CALL_METHOD`. +There are two new opcodes: :opcode:`LOAD_METHOD` and :opcode:`!CALL_METHOD`. (Contributed by Yury Selivanov and INADA Naoki in :issue:`26110`.) -The :opcode:`STORE_ANNOTATION` opcode has been removed. +The :opcode:`!STORE_ANNOTATION` opcode has been removed. (Contributed by Mark Shannon in :issue:`32550`.) diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 4d0b9f2c886f2b..c1f3717b6b74a8 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -2152,11 +2152,11 @@ CPython bytecode changes cleaning-up code for :keyword:`break`, :keyword:`continue` and :keyword:`return`. - Removed opcodes :opcode:`BREAK_LOOP`, :opcode:`CONTINUE_LOOP`, - :opcode:`SETUP_LOOP` and :opcode:`SETUP_EXCEPT`. Added new opcodes - :opcode:`ROT_FOUR`, :opcode:`BEGIN_FINALLY`, :opcode:`CALL_FINALLY` and - :opcode:`POP_FINALLY`. Changed the behavior of :opcode:`END_FINALLY` - and :opcode:`WITH_CLEANUP_START`. + Removed opcodes :opcode:`!BREAK_LOOP`, :opcode:`!CONTINUE_LOOP`, + :opcode:`!SETUP_LOOP` and :opcode:`!SETUP_EXCEPT`. Added new opcodes + :opcode:`!ROT_FOUR`, :opcode:`!BEGIN_FINALLY`, :opcode:`!CALL_FINALLY` and + :opcode:`!POP_FINALLY`. Changed the behavior of :opcode:`!END_FINALLY` + and :opcode:`!WITH_CLEANUP_START`. (Contributed by Mark Shannon, Antoine Pitrou and Serhiy Storchaka in :issue:`17611`.) From f7e587d15c868d4e08397320d556980ee2a9a4ae Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:18:35 +0100 Subject: [PATCH 21/43] [3.12] gh-126384: Add tests to verify the behavior of basic COM methods. (GH-126610) (GH-127160) (cherry picked from commit 7725c0371a93be3ccfaff4871014a80bdf0ea274) Co-authored-by: Jun Komoda <45822440+junkmd@users.noreply.github.com> --- .../test_win32_com_foreign_func.py | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 Lib/test/test_ctypes/test_win32_com_foreign_func.py diff --git a/Lib/test/test_ctypes/test_win32_com_foreign_func.py b/Lib/test/test_ctypes/test_win32_com_foreign_func.py new file mode 100644 index 00000000000000..651c9277d59af9 --- /dev/null +++ b/Lib/test/test_ctypes/test_win32_com_foreign_func.py @@ -0,0 +1,188 @@ +import ctypes +import gc +import sys +import unittest +from ctypes import POINTER, byref, c_void_p +from ctypes.wintypes import BYTE, DWORD, WORD + +if sys.platform != "win32": + raise unittest.SkipTest("Windows-specific test") + + +from _ctypes import COMError +from ctypes import HRESULT + + +COINIT_APARTMENTTHREADED = 0x2 +CLSCTX_SERVER = 5 +S_OK = 0 +OUT = 2 +TRUE = 1 +E_NOINTERFACE = -2147467262 + + +class GUID(ctypes.Structure): + # https://learn.microsoft.com/en-us/windows/win32/api/guiddef/ns-guiddef-guid + _fields_ = [ + ("Data1", DWORD), + ("Data2", WORD), + ("Data3", WORD), + ("Data4", BYTE * 8), + ] + + +def create_proto_com_method(name, index, restype, *argtypes): + proto = ctypes.WINFUNCTYPE(restype, *argtypes) + + def make_method(*args): + foreign_func = proto(index, name, *args) + + def call(self, *args, **kwargs): + return foreign_func(self, *args, **kwargs) + + return call + + return make_method + + +def create_guid(name): + guid = GUID() + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-clsidfromstring + ole32.CLSIDFromString(name, byref(guid)) + return guid + + +def is_equal_guid(guid1, guid2): + # https://learn.microsoft.com/en-us/windows/win32/api/objbase/nf-objbase-isequalguid + return ole32.IsEqualGUID(byref(guid1), byref(guid2)) + + +ole32 = ctypes.oledll.ole32 + +IID_IUnknown = create_guid("{00000000-0000-0000-C000-000000000046}") +IID_IStream = create_guid("{0000000C-0000-0000-C000-000000000046}") +IID_IPersist = create_guid("{0000010C-0000-0000-C000-000000000046}") +CLSID_ShellLink = create_guid("{00021401-0000-0000-C000-000000000046}") + +# https://learn.microsoft.com/en-us/windows/win32/api/unknwn/nf-unknwn-iunknown-queryinterface(refiid_void) +proto_query_interface = create_proto_com_method( + "QueryInterface", 0, HRESULT, POINTER(GUID), POINTER(c_void_p) +) +# https://learn.microsoft.com/en-us/windows/win32/api/unknwn/nf-unknwn-iunknown-addref +proto_add_ref = create_proto_com_method("AddRef", 1, ctypes.c_long) +# https://learn.microsoft.com/en-us/windows/win32/api/unknwn/nf-unknwn-iunknown-release +proto_release = create_proto_com_method("Release", 2, ctypes.c_long) +# https://learn.microsoft.com/en-us/windows/win32/api/objidl/nf-objidl-ipersist-getclassid +proto_get_class_id = create_proto_com_method( + "GetClassID", 3, HRESULT, POINTER(GUID) +) + + +class ForeignFunctionsThatWillCallComMethodsTests(unittest.TestCase): + def setUp(self): + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-coinitializeex + ole32.CoInitializeEx(None, COINIT_APARTMENTTHREADED) + + def tearDown(self): + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-couninitialize + ole32.CoUninitialize() + gc.collect() + + @staticmethod + def create_shelllink_persist(typ): + ppst = typ() + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-cocreateinstance + ole32.CoCreateInstance( + byref(CLSID_ShellLink), + None, + CLSCTX_SERVER, + byref(IID_IPersist), + byref(ppst), + ) + return ppst + + def test_without_paramflags_and_iid(self): + class IUnknown(c_void_p): + QueryInterface = proto_query_interface() + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id() + + ppst = self.create_shelllink_persist(IPersist) + + clsid = GUID() + hr_getclsid = ppst.GetClassID(byref(clsid)) + self.assertEqual(S_OK, hr_getclsid) + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + self.assertEqual(2, ppst.AddRef()) + self.assertEqual(3, ppst.AddRef()) + + punk = IUnknown() + hr_qi = ppst.QueryInterface(IID_IUnknown, punk) + self.assertEqual(S_OK, hr_qi) + self.assertEqual(3, punk.Release()) + + with self.assertRaises(OSError) as e: + punk.QueryInterface(IID_IStream, IUnknown()) + self.assertEqual(E_NOINTERFACE, e.exception.winerror) + + self.assertEqual(2, ppst.Release()) + self.assertEqual(1, ppst.Release()) + self.assertEqual(0, ppst.Release()) + + def test_with_paramflags_and_without_iid(self): + class IUnknown(c_void_p): + QueryInterface = proto_query_interface(None) + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id(((OUT, "pClassID"),)) + + ppst = self.create_shelllink_persist(IPersist) + + clsid = ppst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + punk = IUnknown() + hr_qi = ppst.QueryInterface(IID_IUnknown, punk) + self.assertEqual(S_OK, hr_qi) + self.assertEqual(1, punk.Release()) + + with self.assertRaises(OSError) as e: + ppst.QueryInterface(IID_IStream, IUnknown()) + self.assertEqual(E_NOINTERFACE, e.exception.winerror) + + self.assertEqual(0, ppst.Release()) + + def test_with_paramflags_and_iid(self): + class IUnknown(c_void_p): + QueryInterface = proto_query_interface(None, IID_IUnknown) + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id(((OUT, "pClassID"),), IID_IPersist) + + ppst = self.create_shelllink_persist(IPersist) + + clsid = ppst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + punk = IUnknown() + hr_qi = ppst.QueryInterface(IID_IUnknown, punk) + self.assertEqual(S_OK, hr_qi) + self.assertEqual(1, punk.Release()) + + with self.assertRaises(COMError) as e: + ppst.QueryInterface(IID_IStream, IUnknown()) + self.assertEqual(E_NOINTERFACE, e.exception.hresult) + + self.assertEqual(0, ppst.Release()) + + +if __name__ == '__main__': + unittest.main() From a4d6b905dd46eedaa118b95d1ed052a7171cd608 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:51:21 +0100 Subject: [PATCH 22/43] [3.12] gh-127182: Fix `io.StringIO.__setstate__` crash when `None` is the first value (GH-127219) (#127263) gh-127182: Fix `io.StringIO.__setstate__` crash when `None` is the first value (GH-127219) (cherry picked from commit a2ee89968299fc4f0da4b5a4165025b941213ba5) Co-authored-by: sobolevn Co-authored-by: Victor Stinner --- Lib/test/test_io.py | 15 ++++++++++ ...-11-24-14-20-17.gh-issue-127182.WmfY2g.rst | 2 ++ Modules/_io/stringio.c | 30 ++++++++++--------- 3 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-24-14-20-17.gh-issue-127182.WmfY2g.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index adabf6d6ed7aca..d85040a3083373 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1154,6 +1154,21 @@ def test_disallow_instantiation(self): _io = self._io support.check_disallow_instantiation(self, _io._BytesIOBuffer) + def test_stringio_setstate(self): + # gh-127182: Calling __setstate__() with invalid arguments must not crash + obj = self._io.StringIO() + with self.assertRaisesRegex( + TypeError, + 'initial_value must be str or None, not int', + ): + obj.__setstate__((1, '', 0, {})) + + obj.__setstate__((None, '', 0, {})) # should not crash + self.assertEqual(obj.getvalue(), '') + + obj.__setstate__(('', '', 0, {})) + self.assertEqual(obj.getvalue(), '') + class PyIOTest(IOTest): pass diff --git a/Misc/NEWS.d/next/Library/2024-11-24-14-20-17.gh-issue-127182.WmfY2g.rst b/Misc/NEWS.d/next/Library/2024-11-24-14-20-17.gh-issue-127182.WmfY2g.rst new file mode 100644 index 00000000000000..2cc46ca3d33977 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-24-14-20-17.gh-issue-127182.WmfY2g.rst @@ -0,0 +1,2 @@ +Fix :meth:`!io.StringIO.__setstate__` crash, when :const:`None` was passed as +the first value. diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 568d0bd7097b63..ed9cf0da910077 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -884,23 +884,25 @@ stringio_setstate(stringio *self, PyObject *state) once by __init__. So we do not take any chance and replace object's buffer completely. */ { - PyObject *item; - Py_UCS4 *buf; - Py_ssize_t bufsize; - - item = PyTuple_GET_ITEM(state, 0); - buf = PyUnicode_AsUCS4Copy(item); - if (buf == NULL) - return NULL; - bufsize = PyUnicode_GET_LENGTH(item); + PyObject *item = PyTuple_GET_ITEM(state, 0); + if (PyUnicode_Check(item)) { + Py_UCS4 *buf = PyUnicode_AsUCS4Copy(item); + if (buf == NULL) + return NULL; + Py_ssize_t bufsize = PyUnicode_GET_LENGTH(item); - if (resize_buffer(self, bufsize) < 0) { + if (resize_buffer(self, bufsize) < 0) { + PyMem_Free(buf); + return NULL; + } + memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); PyMem_Free(buf); - return NULL; + self->string_size = bufsize; + } + else { + assert(item == Py_None); + self->string_size = 0; } - memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); - PyMem_Free(buf); - self->string_size = bufsize; } /* Set carefully the position value. Alternatively, we could use the seek From 3806772b3d30a574eb26bcc3a1e0ceae49ad4401 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 25 Nov 2024 16:48:00 -0300 Subject: [PATCH 23/43] [3.12] Fix a few typos found in the docs (GH-127126) (GH-127185) (cherry picked from commit 39e60aeb3837f1f23d8b7f30d3b8d9faf805ef88) --- Doc/library/importlib.metadata.rst | 2 +- Doc/library/multiprocessing.rst | 4 ++-- Doc/library/socket.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index fe898cf4af8bd5..289e02ccdc153c 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -133,7 +133,7 @@ Entry points Details of a collection of installed entry points. - Also provides a ``.groups`` attribute that reports all identifed entry + Also provides a ``.groups`` attribute that reports all identified entry point groups, and a ``.names`` attribute that reports all identified entry point names. diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index f2a9ada85e235a..3fec86080c5ead 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -282,7 +282,7 @@ processes: of corruption from processes using different ends of the pipe at the same time. - The :meth:`~Connection.send` method serializes the the object and + The :meth:`~Connection.send` method serializes the object and :meth:`~Connection.recv` re-creates the object. Synchronization between processes @@ -819,7 +819,7 @@ For an example of the usage of queues for interprocess communication see used for receiving messages and ``conn2`` can only be used for sending messages. - The :meth:`~multiprocessing.Connection.send` method serializes the the object using + The :meth:`~multiprocessing.Connection.send` method serializes the object using :mod:`pickle` and the :meth:`~multiprocessing.Connection.recv` re-creates the object. .. class:: Queue([maxsize]) diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index ab0ede803a0509..dd4c2f8317d120 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -966,7 +966,7 @@ The :mod:`socket` module also offers various network-related services: These addresses should generally be tried in order until a connection succeeds (possibly tried in parallel, for example, using a `Happy Eyeballs`_ algorithm). In these cases, limiting the *type* and/or *proto* can help eliminate - unsuccessful or unusable connecton attempts. + unsuccessful or unusable connection attempts. Some systems will, however, only return a single address. (For example, this was reported on Solaris and AIX configurations.) From 619037af1c0f6daf6b682e974f8e6ffaaf4c01d9 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 25 Nov 2024 22:42:34 +0100 Subject: [PATCH 24/43] [3.12] gh-127265: Remove single quotes from 'arrow's in tutorial/errors.rst (GH-127268) (cherry picked from commit 26ff32b30553e1f7b0cc822835ad2da8890c180c) Co-authored-by: funkyrailroad --- Doc/tutorial/errors.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tutorial/errors.rst b/Doc/tutorial/errors.rst index 981b14f5a4212b..247b2f6407f8f7 100644 --- a/Doc/tutorial/errors.rst +++ b/Doc/tutorial/errors.rst @@ -23,7 +23,7 @@ complaint you get while you are still learning Python:: ^^^^^ SyntaxError: invalid syntax -The parser repeats the offending line and displays little 'arrow's pointing +The parser repeats the offending line and displays little arrows pointing at the token in the line where the error was detected. The error may be caused by the absence of a token *before* the indicated token. In the example, the error is detected at the function :func:`print`, since a colon From 08531ac83fc5d844ccbba589913f7393e925dc79 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:13:29 +0100 Subject: [PATCH 25/43] [3.12] gh-127183: Add `_ctypes.CopyComPointer` tests (GH-127184) (GH-127252) gh-127183: Add `_ctypes.CopyComPointer` tests (GH-127184) * Make `create_shelllink_persist` top level function. * Add `CopyComPointerTests`. * Add more tests. * Update tests. * Add assertions for `Release`'s return value. (cherry picked from commit c7f1e3e150ca181f4b4bd1e5b59d492749f00be6) Co-authored-by: Jun Komoda <45822440+junkmd@users.noreply.github.com> --- .../test_win32_com_foreign_func.py | 132 +++++++++++++++--- 1 file changed, 115 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_ctypes/test_win32_com_foreign_func.py b/Lib/test/test_ctypes/test_win32_com_foreign_func.py index 651c9277d59af9..8d217fc17efa02 100644 --- a/Lib/test/test_ctypes/test_win32_com_foreign_func.py +++ b/Lib/test/test_ctypes/test_win32_com_foreign_func.py @@ -9,7 +9,7 @@ raise unittest.SkipTest("Windows-specific test") -from _ctypes import COMError +from _ctypes import COMError, CopyComPointer from ctypes import HRESULT @@ -78,6 +78,19 @@ def is_equal_guid(guid1, guid2): ) +def create_shelllink_persist(typ): + ppst = typ() + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-cocreateinstance + ole32.CoCreateInstance( + byref(CLSID_ShellLink), + None, + CLSCTX_SERVER, + byref(IID_IPersist), + byref(ppst), + ) + return ppst + + class ForeignFunctionsThatWillCallComMethodsTests(unittest.TestCase): def setUp(self): # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-coinitializeex @@ -88,19 +101,6 @@ def tearDown(self): ole32.CoUninitialize() gc.collect() - @staticmethod - def create_shelllink_persist(typ): - ppst = typ() - # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-cocreateinstance - ole32.CoCreateInstance( - byref(CLSID_ShellLink), - None, - CLSCTX_SERVER, - byref(IID_IPersist), - byref(ppst), - ) - return ppst - def test_without_paramflags_and_iid(self): class IUnknown(c_void_p): QueryInterface = proto_query_interface() @@ -110,7 +110,7 @@ class IUnknown(c_void_p): class IPersist(IUnknown): GetClassID = proto_get_class_id() - ppst = self.create_shelllink_persist(IPersist) + ppst = create_shelllink_persist(IPersist) clsid = GUID() hr_getclsid = ppst.GetClassID(byref(clsid)) @@ -142,7 +142,7 @@ class IUnknown(c_void_p): class IPersist(IUnknown): GetClassID = proto_get_class_id(((OUT, "pClassID"),)) - ppst = self.create_shelllink_persist(IPersist) + ppst = create_shelllink_persist(IPersist) clsid = ppst.GetClassID() self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) @@ -167,7 +167,7 @@ class IUnknown(c_void_p): class IPersist(IUnknown): GetClassID = proto_get_class_id(((OUT, "pClassID"),), IID_IPersist) - ppst = self.create_shelllink_persist(IPersist) + ppst = create_shelllink_persist(IPersist) clsid = ppst.GetClassID() self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) @@ -184,5 +184,103 @@ class IPersist(IUnknown): self.assertEqual(0, ppst.Release()) +class CopyComPointerTests(unittest.TestCase): + def setUp(self): + ole32.CoInitializeEx(None, COINIT_APARTMENTTHREADED) + + class IUnknown(c_void_p): + QueryInterface = proto_query_interface(None, IID_IUnknown) + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id(((OUT, "pClassID"),), IID_IPersist) + + self.IUnknown = IUnknown + self.IPersist = IPersist + + def tearDown(self): + ole32.CoUninitialize() + gc.collect() + + def test_both_are_null(self): + src = self.IPersist() + dst = self.IPersist() + + hr = CopyComPointer(src, byref(dst)) + + self.assertEqual(S_OK, hr) + + self.assertIsNone(src.value) + self.assertIsNone(dst.value) + + def test_src_is_nonnull_and_dest_is_null(self): + # The reference count of the COM pointer created by `CoCreateInstance` + # is initially 1. + src = create_shelllink_persist(self.IPersist) + dst = self.IPersist() + + # `CopyComPointer` calls `AddRef` explicitly in the C implementation. + # The refcount of `src` is incremented from 1 to 2 here. + hr = CopyComPointer(src, byref(dst)) + + self.assertEqual(S_OK, hr) + self.assertEqual(src.value, dst.value) + + # This indicates that the refcount was 2 before the `Release` call. + self.assertEqual(1, src.Release()) + + clsid = dst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + self.assertEqual(0, dst.Release()) + + def test_src_is_null_and_dest_is_nonnull(self): + src = self.IPersist() + dst_orig = create_shelllink_persist(self.IPersist) + dst = self.IPersist() + CopyComPointer(dst_orig, byref(dst)) + self.assertEqual(1, dst_orig.Release()) + + clsid = dst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + # This does NOT affects the refcount of `dst_orig`. + hr = CopyComPointer(src, byref(dst)) + + self.assertEqual(S_OK, hr) + self.assertIsNone(dst.value) + + with self.assertRaises(ValueError): + dst.GetClassID() # NULL COM pointer access + + # This indicates that the refcount was 1 before the `Release` call. + self.assertEqual(0, dst_orig.Release()) + + def test_both_are_nonnull(self): + src = create_shelllink_persist(self.IPersist) + dst_orig = create_shelllink_persist(self.IPersist) + dst = self.IPersist() + CopyComPointer(dst_orig, byref(dst)) + self.assertEqual(1, dst_orig.Release()) + + self.assertEqual(dst.value, dst_orig.value) + self.assertNotEqual(src.value, dst.value) + + hr = CopyComPointer(src, byref(dst)) + + self.assertEqual(S_OK, hr) + self.assertEqual(src.value, dst.value) + self.assertNotEqual(dst.value, dst_orig.value) + + self.assertEqual(1, src.Release()) + + clsid = dst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + self.assertEqual(0, dst.Release()) + self.assertEqual(0, dst_orig.Release()) + + if __name__ == '__main__': unittest.main() From 16e899d58915b978bb491de5d51c1c47f69a590d Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Wed, 27 Nov 2024 18:46:42 +0900 Subject: [PATCH 26/43] [3.12] gh-101100: Fix sphinx warnings in `howto/*` (GH-127084) (#127311) --- Doc/howto/descriptor.rst | 100 +++++++++++++++++------------------ Doc/howto/enum.rst | 109 ++++++++++++++++++++------------------- Doc/tools/.nitignore | 2 - 3 files changed, 105 insertions(+), 106 deletions(-) diff --git a/Doc/howto/descriptor.rst b/Doc/howto/descriptor.rst index 985e2639d874cd..ac73fcb3b01966 100644 --- a/Doc/howto/descriptor.rst +++ b/Doc/howto/descriptor.rst @@ -42,7 +42,7 @@ add new capabilities one by one. Simple example: A descriptor that returns a constant ---------------------------------------------------- -The :class:`Ten` class is a descriptor whose :meth:`__get__` method always +The :class:`!Ten` class is a descriptor whose :meth:`~object.__get__` method always returns the constant ``10``: .. testcode:: @@ -120,10 +120,10 @@ different, updated answers each time:: 2 Besides showing how descriptors can run computations, this example also -reveals the purpose of the parameters to :meth:`__get__`. The *self* +reveals the purpose of the parameters to :meth:`~object.__get__`. The *self* parameter is *size*, an instance of *DirectorySize*. The *obj* parameter is either *g* or *s*, an instance of *Directory*. It is the *obj* parameter that -lets the :meth:`__get__` method learn the target directory. The *objtype* +lets the :meth:`~object.__get__` method learn the target directory. The *objtype* parameter is the class *Directory*. @@ -133,7 +133,7 @@ Managed attributes A popular use for descriptors is managing access to instance data. The descriptor is assigned to a public attribute in the class dictionary while the actual data is stored as a private attribute in the instance dictionary. The -descriptor's :meth:`__get__` and :meth:`__set__` methods are triggered when +descriptor's :meth:`~object.__get__` and :meth:`~object.__set__` methods are triggered when the public attribute is accessed. In the following example, *age* is the public attribute and *_age* is the @@ -215,9 +215,9 @@ Customized names When a class uses descriptors, it can inform each descriptor about which variable name was used. -In this example, the :class:`Person` class has two descriptor instances, -*name* and *age*. When the :class:`Person` class is defined, it makes a -callback to :meth:`__set_name__` in *LoggedAccess* so that the field names can +In this example, the :class:`!Person` class has two descriptor instances, +*name* and *age*. When the :class:`!Person` class is defined, it makes a +callback to :meth:`~object.__set_name__` in *LoggedAccess* so that the field names can be recorded, giving each descriptor its own *public_name* and *private_name*: .. testcode:: @@ -253,8 +253,8 @@ be recorded, giving each descriptor its own *public_name* and *private_name*: def birthday(self): self.age += 1 -An interactive session shows that the :class:`Person` class has called -:meth:`__set_name__` so that the field names would be recorded. Here +An interactive session shows that the :class:`!Person` class has called +:meth:`~object.__set_name__` so that the field names would be recorded. Here we call :func:`vars` to look up the descriptor without triggering it: .. doctest:: @@ -294,10 +294,10 @@ The two *Person* instances contain only the private names: Closing thoughts ---------------- -A :term:`descriptor` is what we call any object that defines :meth:`__get__`, -:meth:`__set__`, or :meth:`__delete__`. +A :term:`descriptor` is what we call any object that defines :meth:`~object.__get__`, +:meth:`~object.__set__`, or :meth:`~object.__delete__`. -Optionally, descriptors can have a :meth:`__set_name__` method. This is only +Optionally, descriptors can have a :meth:`~object.__set_name__` method. This is only used in cases where a descriptor needs to know either the class where it was created or the name of class variable it was assigned to. (This method, if present, is called even if the class is not a descriptor.) @@ -337,7 +337,7 @@ any data, it verifies that the new value meets various type and range restrictions. If those restrictions aren't met, it raises an exception to prevent data corruption at its source. -This :class:`Validator` class is both an :term:`abstract base class` and a +This :class:`!Validator` class is both an :term:`abstract base class` and a managed attribute descriptor: .. testcode:: @@ -360,8 +360,8 @@ managed attribute descriptor: def validate(self, value): pass -Custom validators need to inherit from :class:`Validator` and must supply a -:meth:`validate` method to test various restrictions as needed. +Custom validators need to inherit from :class:`!Validator` and must supply a +:meth:`!validate` method to test various restrictions as needed. Custom validators @@ -369,13 +369,13 @@ Custom validators Here are three practical data validation utilities: -1) :class:`OneOf` verifies that a value is one of a restricted set of options. +1) :class:`!OneOf` verifies that a value is one of a restricted set of options. -2) :class:`Number` verifies that a value is either an :class:`int` or +2) :class:`!Number` verifies that a value is either an :class:`int` or :class:`float`. Optionally, it verifies that a value is between a given minimum or maximum. -3) :class:`String` verifies that a value is a :class:`str`. Optionally, it +3) :class:`!String` verifies that a value is a :class:`str`. Optionally, it validates a given minimum or maximum length. It can validate a user-defined `predicate `_ as well. @@ -498,8 +498,8 @@ Definition and introduction --------------------------- In general, a descriptor is an attribute value that has one of the methods in -the descriptor protocol. Those methods are :meth:`__get__`, :meth:`__set__`, -and :meth:`__delete__`. If any of those methods are defined for an +the descriptor protocol. Those methods are :meth:`~object.__get__`, :meth:`~object.__set__`, +and :meth:`~object.__delete__`. If any of those methods are defined for an attribute, it is said to be a :term:`descriptor`. The default behavior for attribute access is to get, set, or delete the @@ -531,8 +531,8 @@ That is all there is to it. Define any of these methods and an object is considered a descriptor and can override default behavior upon being looked up as an attribute. -If an object defines :meth:`__set__` or :meth:`__delete__`, it is considered -a data descriptor. Descriptors that only define :meth:`__get__` are called +If an object defines :meth:`~object.__set__` or :meth:`~object.__delete__`, it is considered +a data descriptor. Descriptors that only define :meth:`~object.__get__` are called non-data descriptors (they are often used for methods but other uses are possible). @@ -542,9 +542,9 @@ has an entry with the same name as a data descriptor, the data descriptor takes precedence. If an instance's dictionary has an entry with the same name as a non-data descriptor, the dictionary entry takes precedence. -To make a read-only data descriptor, define both :meth:`__get__` and -:meth:`__set__` with the :meth:`__set__` raising an :exc:`AttributeError` when -called. Defining the :meth:`__set__` method with an exception raising +To make a read-only data descriptor, define both :meth:`~object.__get__` and +:meth:`~object.__set__` with the :meth:`~object.__set__` raising an :exc:`AttributeError` when +called. Defining the :meth:`~object.__set__` method with an exception raising placeholder is enough to make it a data descriptor. @@ -571,7 +571,7 @@ Invocation from an instance Instance lookup scans through a chain of namespaces giving data descriptors the highest priority, followed by instance variables, then non-data -descriptors, then class variables, and lastly :meth:`__getattr__` if it is +descriptors, then class variables, and lastly :meth:`~object.__getattr__` if it is provided. If a descriptor is found for ``a.x``, then it is invoked with: @@ -716,12 +716,12 @@ a pure Python equivalent: >>> object_getattribute(u2, 'x') == u2.x == (D1, u2, U2) True -Note, there is no :meth:`__getattr__` hook in the :meth:`__getattribute__` -code. That is why calling :meth:`__getattribute__` directly or with -``super().__getattribute__`` will bypass :meth:`__getattr__` entirely. +Note, there is no :meth:`~object.__getattr__` hook in the :meth:`~object.__getattribute__` +code. That is why calling :meth:`~object.__getattribute__` directly or with +``super().__getattribute__`` will bypass :meth:`~object.__getattr__` entirely. Instead, it is the dot operator and the :func:`getattr` function that are -responsible for invoking :meth:`__getattr__` whenever :meth:`__getattribute__` +responsible for invoking :meth:`~object.__getattr__` whenever :meth:`~object.__getattribute__` raises an :exc:`AttributeError`. Their logic is encapsulated in a helper function: @@ -773,8 +773,8 @@ Invocation from a class ----------------------- The logic for a dotted lookup such as ``A.x`` is in -:meth:`type.__getattribute__`. The steps are similar to those for -:meth:`object.__getattribute__` but the instance dictionary lookup is replaced +:meth:`!type.__getattribute__`. The steps are similar to those for +:meth:`!object.__getattribute__` but the instance dictionary lookup is replaced by a search through the class's :term:`method resolution order`. If a descriptor is found, it is invoked with ``desc.__get__(None, A)``. @@ -786,7 +786,7 @@ The full C implementation can be found in :c:func:`!type_getattro` and Invocation from super --------------------- -The logic for super's dotted lookup is in the :meth:`__getattribute__` method for +The logic for super's dotted lookup is in the :meth:`~object.__getattribute__` method for object returned by :func:`super`. A dotted lookup such as ``super(A, obj).m`` searches ``obj.__class__.__mro__`` @@ -803,21 +803,21 @@ The full C implementation can be found in :c:func:`!super_getattro` in Summary of invocation logic --------------------------- -The mechanism for descriptors is embedded in the :meth:`__getattribute__` +The mechanism for descriptors is embedded in the :meth:`~object.__getattribute__` methods for :class:`object`, :class:`type`, and :func:`super`. The important points to remember are: -* Descriptors are invoked by the :meth:`__getattribute__` method. +* Descriptors are invoked by the :meth:`~object.__getattribute__` method. * Classes inherit this machinery from :class:`object`, :class:`type`, or :func:`super`. -* Overriding :meth:`__getattribute__` prevents automatic descriptor calls +* Overriding :meth:`~object.__getattribute__` prevents automatic descriptor calls because all the descriptor logic is in that method. -* :meth:`object.__getattribute__` and :meth:`type.__getattribute__` make - different calls to :meth:`__get__`. The first includes the instance and may +* :meth:`!object.__getattribute__` and :meth:`!type.__getattribute__` make + different calls to :meth:`~object.__get__`. The first includes the instance and may include the class. The second puts in ``None`` for the instance and always includes the class. @@ -832,16 +832,16 @@ Automatic name notification Sometimes it is desirable for a descriptor to know what class variable name it was assigned to. When a new class is created, the :class:`type` metaclass scans the dictionary of the new class. If any of the entries are descriptors -and if they define :meth:`__set_name__`, that method is called with two +and if they define :meth:`~object.__set_name__`, that method is called with two arguments. The *owner* is the class where the descriptor is used, and the *name* is the class variable the descriptor was assigned to. The implementation details are in :c:func:`!type_new` and :c:func:`!set_names` in :source:`Objects/typeobject.c`. -Since the update logic is in :meth:`type.__new__`, notifications only take +Since the update logic is in :meth:`!type.__new__`, notifications only take place at the time of class creation. If descriptors are added to the class -afterwards, :meth:`__set_name__` will need to be called manually. +afterwards, :meth:`~object.__set_name__` will need to be called manually. ORM example @@ -870,7 +870,7 @@ care of lookups or updates: conn.execute(self.store, [value, obj.key]) conn.commit() -We can use the :class:`Field` class to define `models +We can use the :class:`!Field` class to define `models `_ that describe the schema for each table in a database: @@ -1150,7 +1150,7 @@ to wrap access to the value attribute in a property data descriptor: self.recalc() return self._value -Either the built-in :func:`property` or our :func:`Property` equivalent would +Either the built-in :func:`property` or our :func:`!Property` equivalent would work in this example. @@ -1183,7 +1183,7 @@ roughly equivalent to: return func(obj, *args, **kwargs) To support automatic creation of methods, functions include the -:meth:`__get__` method for binding methods during attribute access. This +:meth:`~object.__get__` method for binding methods during attribute access. This means that functions are non-data descriptors that return bound methods during dotted lookup from an instance. Here's how it works: @@ -1215,19 +1215,19 @@ The function has a :term:`qualified name` attribute to support introspection: 'D.f' Accessing the function through the class dictionary does not invoke -:meth:`__get__`. Instead, it just returns the underlying function object:: +:meth:`~object.__get__`. Instead, it just returns the underlying function object:: >>> D.__dict__['f'] -Dotted access from a class calls :meth:`__get__` which just returns the +Dotted access from a class calls :meth:`~object.__get__` which just returns the underlying function unchanged:: >>> D.f The interesting behavior occurs during dotted access from an instance. The -dotted lookup calls :meth:`__get__` which returns a bound method object:: +dotted lookup calls :meth:`~object.__get__` which returns a bound method object:: >>> d = D() >>> d.f @@ -1252,7 +1252,7 @@ Kinds of methods Non-data descriptors provide a simple mechanism for variations on the usual patterns of binding functions into methods. -To recap, functions have a :meth:`__get__` method so that they can be converted +To recap, functions have a :meth:`~object.__get__` method so that they can be converted to a method when accessed as attributes. The non-data descriptor transforms an ``obj.f(*args)`` call into ``f(obj, *args)``. Calling ``cls.f(*args)`` becomes ``f(*args)``. @@ -1682,7 +1682,7 @@ by member descriptors: 'Emulate member_repr() in Objects/descrobject.c' return f'' -The :meth:`type.__new__` method takes care of adding member objects to class +The :meth:`!type.__new__` method takes care of adding member objects to class variables: .. testcode:: @@ -1733,7 +1733,7 @@ Python: ) super().__delattr__(name) -To use the simulation in a real class, just inherit from :class:`Object` and +To use the simulation in a real class, just inherit from :class:`!Object` and set the :term:`metaclass` to :class:`Type`: .. testcode:: diff --git a/Doc/howto/enum.rst b/Doc/howto/enum.rst index 6d43202aca9c0d..9b6bb613749909 100644 --- a/Doc/howto/enum.rst +++ b/Doc/howto/enum.rst @@ -62,12 +62,12 @@ The *type* of an enumeration member is the enum it belongs to:: >>> isinstance(Weekday.FRIDAY, Weekday) True -Enum members have an attribute that contains just their :attr:`name`:: +Enum members have an attribute that contains just their :attr:`!name`:: >>> print(Weekday.TUESDAY.name) TUESDAY -Likewise, they have an attribute for their :attr:`value`:: +Likewise, they have an attribute for their :attr:`!value`:: >>> Weekday.WEDNESDAY.value @@ -75,17 +75,18 @@ Likewise, they have an attribute for their :attr:`value`:: Unlike many languages that treat enumerations solely as name/value pairs, Python Enums can have behavior added. For example, :class:`datetime.date` -has two methods for returning the weekday: :meth:`weekday` and :meth:`isoweekday`. +has two methods for returning the weekday: +:meth:`~datetime.date.weekday` and :meth:`~datetime.date.isoweekday`. The difference is that one of them counts from 0-6 and the other from 1-7. -Rather than keep track of that ourselves we can add a method to the :class:`Weekday` -enum to extract the day from the :class:`date` instance and return the matching +Rather than keep track of that ourselves we can add a method to the :class:`!Weekday` +enum to extract the day from the :class:`~datetime.date` instance and return the matching enum member:: @classmethod def from_date(cls, date): return cls(date.isoweekday()) -The complete :class:`Weekday` enum now looks like this:: +The complete :class:`!Weekday` enum now looks like this:: >>> class Weekday(Enum): ... MONDAY = 1 @@ -108,7 +109,7 @@ Now we can find out what today is! Observe:: Of course, if you're reading this on some other day, you'll see that day instead. -This :class:`Weekday` enum is great if our variable only needs one day, but +This :class:`!Weekday` enum is great if our variable only needs one day, but what if we need several? Maybe we're writing a function to plot chores during a week, and don't want to use a :class:`list` -- we could use a different type of :class:`Enum`:: @@ -126,7 +127,7 @@ of :class:`Enum`:: We've changed two things: we're inherited from :class:`Flag`, and the values are all powers of 2. -Just like the original :class:`Weekday` enum above, we can have a single selection:: +Just like the original :class:`!Weekday` enum above, we can have a single selection:: >>> first_week_day = Weekday.MONDAY >>> first_week_day @@ -201,7 +202,7 @@ If you want to access enum members by *name*, use item access:: >>> Color['GREEN'] -If you have an enum member and need its :attr:`name` or :attr:`value`:: +If you have an enum member and need its :attr:`!name` or :attr:`!value`:: >>> member = Color.RED >>> member.name @@ -282,7 +283,7 @@ If the exact value is unimportant you can use :class:`auto`:: >>> [member.value for member in Color] [1, 2, 3] -The values are chosen by :func:`_generate_next_value_`, which can be +The values are chosen by :func:`~Enum._generate_next_value_`, which can be overridden:: >>> class AutoName(Enum): @@ -301,7 +302,7 @@ overridden:: .. note:: - The :meth:`_generate_next_value_` method must be defined before any members. + The :meth:`~Enum._generate_next_value_` method must be defined before any members. Iteration --------- @@ -422,18 +423,18 @@ Then:: The rules for what is allowed are as follows: names that start and end with a single underscore are reserved by enum and cannot be used; all other attributes defined within an enumeration will become members of this -enumeration, with the exception of special methods (:meth:`__str__`, -:meth:`__add__`, etc.), descriptors (methods are also descriptors), and -variable names listed in :attr:`_ignore_`. +enumeration, with the exception of special methods (:meth:`~object.__str__`, +:meth:`~object.__add__`, etc.), descriptors (methods are also descriptors), and +variable names listed in :attr:`~Enum._ignore_`. -Note: if your enumeration defines :meth:`__new__` and/or :meth:`__init__`, +Note: if your enumeration defines :meth:`~object.__new__` and/or :meth:`~object.__init__`, any value(s) given to the enum member will be passed into those methods. See `Planet`_ for an example. .. note:: - The :meth:`__new__` method, if defined, is used during creation of the Enum - members; it is then replaced by Enum's :meth:`__new__` which is used after + The :meth:`~object.__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's :meth:`~object.__new__` which is used after class creation for lookup of existing members. See :ref:`new-vs-init` for more details. @@ -525,7 +526,7 @@ from that module. nested in other classes. It is possible to modify how enum members are pickled/unpickled by defining -:meth:`__reduce_ex__` in the enumeration class. The default method is by-value, +:meth:`~object.__reduce_ex__` in the enumeration class. The default method is by-value, but enums with complicated values may want to use by-name:: >>> import enum @@ -561,7 +562,7 @@ values. The last two options enable assigning arbitrary values to enumerations; the others auto-assign increasing integers starting with 1 (use the ``start`` parameter to specify a different starting value). A new class derived from :class:`Enum` is returned. In other words, the above -assignment to :class:`Animal` is equivalent to:: +assignment to :class:`!Animal` is equivalent to:: >>> class Animal(Enum): ... ANT = 1 @@ -872,7 +873,7 @@ simple to implement independently:: pass This demonstrates how similar derived enumerations can be defined; for example -a :class:`FloatEnum` that mixes in :class:`float` instead of :class:`int`. +a :class:`!FloatEnum` that mixes in :class:`float` instead of :class:`int`. Some rules: @@ -886,32 +887,32 @@ Some rules: additional type, all the members must have values of that type, e.g. :class:`int` above. This restriction does not apply to mix-ins which only add methods and don't specify another type. -4. When another data type is mixed in, the :attr:`value` attribute is *not the +4. When another data type is mixed in, the :attr:`~Enum.value` attribute is *not the same* as the enum member itself, although it is equivalent and will compare equal. -5. A ``data type`` is a mixin that defines :meth:`__new__`, or a +5. A ``data type`` is a mixin that defines :meth:`~object.__new__`, or a :class:`~dataclasses.dataclass` 6. %-style formatting: ``%s`` and ``%r`` call the :class:`Enum` class's - :meth:`__str__` and :meth:`__repr__` respectively; other codes (such as + :meth:`~object.__str__` and :meth:`~object.__repr__` respectively; other codes (such as ``%i`` or ``%h`` for IntEnum) treat the enum member as its mixed-in type. 7. :ref:`Formatted string literals `, :meth:`str.format`, - and :func:`format` will use the enum's :meth:`__str__` method. + and :func:`format` will use the enum's :meth:`~object.__str__` method. .. note:: Because :class:`IntEnum`, :class:`IntFlag`, and :class:`StrEnum` are designed to be drop-in replacements for existing constants, their - :meth:`__str__` method has been reset to their data types' - :meth:`__str__` method. + :meth:`~object.__str__` method has been reset to their data types' + :meth:`~object.__str__` method. .. _new-vs-init: -When to use :meth:`__new__` vs. :meth:`__init__` ------------------------------------------------- +When to use :meth:`~object.__new__` vs. :meth:`~object.__init__` +---------------------------------------------------------------- -:meth:`__new__` must be used whenever you want to customize the actual value of +:meth:`~object.__new__` must be used whenever you want to customize the actual value of the :class:`Enum` member. Any other modifications may go in either -:meth:`__new__` or :meth:`__init__`, with :meth:`__init__` being preferred. +:meth:`~object.__new__` or :meth:`~object.__init__`, with :meth:`~object.__init__` being preferred. For example, if you want to pass several items to the constructor, but only want one of them to be the value:: @@ -950,28 +951,28 @@ Finer Points Supported ``__dunder__`` names """""""""""""""""""""""""""""" -:attr:`__members__` is a read-only ordered mapping of ``member_name``:``member`` +:attr:`~enum.EnumType.__members__` is a read-only ordered mapping of ``member_name``:``member`` items. It is only available on the class. -:meth:`__new__`, if specified, must create and return the enum members; it is -also a very good idea to set the member's :attr:`_value_` appropriately. Once +:meth:`~object.__new__`, if specified, must create and return the enum members; it is +also a very good idea to set the member's :attr:`~Enum._value_` appropriately. Once all the members are created it is no longer used. Supported ``_sunder_`` names """""""""""""""""""""""""""" -- ``_name_`` -- name of the member -- ``_value_`` -- value of the member; can be set / modified in ``__new__`` +- :attr:`~Enum._name_` -- name of the member +- :attr:`~Enum._value_` -- value of the member; can be set / modified in ``__new__`` -- ``_missing_`` -- a lookup function used when a value is not found; may be +- :meth:`~Enum._missing_` -- a lookup function used when a value is not found; may be overridden -- ``_ignore_`` -- a list of names, either as a :class:`list` or a :class:`str`, +- :attr:`~Enum._ignore_` -- a list of names, either as a :class:`list` or a :class:`str`, that will not be transformed into members, and will be removed from the final class -- ``_order_`` -- used in Python 2/3 code to ensure member order is consistent +- :attr:`~Enum._order_` -- used in Python 2/3 code to ensure member order is consistent (class attribute, removed during class creation) -- ``_generate_next_value_`` -- used by the `Functional API`_ and by +- :meth:`~Enum._generate_next_value_` -- used by the `Functional API`_ and by :class:`auto` to get an appropriate value for an enum member; may be overridden @@ -986,7 +987,7 @@ Supported ``_sunder_`` names .. versionadded:: 3.6 ``_missing_``, ``_order_``, ``_generate_next_value_`` .. versionadded:: 3.7 ``_ignore_`` -To help keep Python 2 / Python 3 code in sync an :attr:`_order_` attribute can +To help keep Python 2 / Python 3 code in sync an :attr:`~Enum._order_` attribute can be provided. It will be checked against the actual order of the enumeration and raise an error if the two do not match:: @@ -1004,7 +1005,7 @@ and raise an error if the two do not match:: .. note:: - In Python 2 code the :attr:`_order_` attribute is necessary as definition + In Python 2 code the :attr:`~Enum._order_` attribute is necessary as definition order is lost before it can be recorded. @@ -1193,12 +1194,12 @@ Enum Classes ^^^^^^^^^^^^ The :class:`EnumType` metaclass is responsible for providing the -:meth:`__contains__`, :meth:`__dir__`, :meth:`__iter__` and other methods that +:meth:`~object.__contains__`, :meth:`~object.__dir__`, :meth:`~object.__iter__` and other methods that allow one to do things with an :class:`Enum` class that fail on a typical class, such as ``list(Color)`` or ``some_enum_var in Color``. :class:`EnumType` is responsible for ensuring that various other methods on the final :class:`Enum` -class are correct (such as :meth:`__new__`, :meth:`__getnewargs__`, -:meth:`__str__` and :meth:`__repr__`). +class are correct (such as :meth:`~object.__new__`, :meth:`~object.__getnewargs__`, +:meth:`~object.__str__` and :meth:`~object.__repr__`). Flag Classes ^^^^^^^^^^^^ @@ -1213,7 +1214,7 @@ Enum Members (aka instances) The most interesting thing about enum members is that they are singletons. :class:`EnumType` creates them all while it is creating the enum class itself, -and then puts a custom :meth:`__new__` in place to ensure that no new ones are +and then puts a custom :meth:`~object.__new__` in place to ensure that no new ones are ever instantiated by returning only the existing member instances. Flag Members @@ -1261,7 +1262,7 @@ is. There are several ways to define this type of simple enumeration: - use instances of :class:`auto` for the value - use instances of :class:`object` as the value - use a descriptive string as the value -- use a tuple as the value and a custom :meth:`__new__` to replace the +- use a tuple as the value and a custom :meth:`~object.__new__` to replace the tuple with an :class:`int` value Using any of these methods signifies to the user that these values are not @@ -1297,7 +1298,7 @@ Using :class:`object` would look like:: > This is also a good example of why you might want to write your own -:meth:`__repr__`:: +:meth:`~object.__repr__`:: >>> class Color(Enum): ... RED = object() @@ -1325,10 +1326,10 @@ Using a string as the value would look like:: -Using a custom :meth:`__new__` -"""""""""""""""""""""""""""""" +Using a custom :meth:`~object.__new__` +"""""""""""""""""""""""""""""""""""""" -Using an auto-numbering :meth:`__new__` would look like:: +Using an auto-numbering :meth:`~object.__new__` would look like:: >>> class AutoNumber(Enum): ... def __new__(cls): @@ -1374,8 +1375,8 @@ to handle any extra arguments:: .. note:: - The :meth:`__new__` method, if defined, is used during creation of the Enum - members; it is then replaced by Enum's :meth:`__new__` which is used after + The :meth:`~object.__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's :meth:`~object.__new__` which is used after class creation for lookup of existing members. .. warning:: @@ -1458,7 +1459,7 @@ alias:: Planet ^^^^^^ -If :meth:`__new__` or :meth:`__init__` is defined, the value of the enum member +If :meth:`~object.__new__` or :meth:`~object.__init__` is defined, the value of the enum member will be passed to those methods:: >>> class Planet(Enum): @@ -1489,7 +1490,7 @@ will be passed to those methods:: TimePeriod ^^^^^^^^^^ -An example to show the :attr:`_ignore_` attribute in use:: +An example to show the :attr:`~Enum._ignore_` attribute in use:: >>> from datetime import timedelta >>> class Period(timedelta, Enum): diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 108ad3938a135b..a0fff8e4a87d35 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -14,8 +14,6 @@ Doc/c-api/type.rst Doc/c-api/typeobj.rst Doc/extending/extending.rst Doc/glossary.rst -Doc/howto/descriptor.rst -Doc/howto/enum.rst Doc/library/2to3.rst Doc/library/aifc.rst Doc/library/ast.rst From 97ed21636cd204df4295cbc4f9c27053b6fd8a45 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:22:32 +0100 Subject: [PATCH 27/43] [3.12] gh-109746: Fix race condition in test_start_new_thread_failed (GH-127299) (GH-127324) (cherry picked from commit 83926d3b4c7847394b5e2531e9566d7fc9fbea0f) Co-authored-by: Serhiy Storchaka --- Lib/test/test_threading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 183af7c1f0836f..75a56f7830bead 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1175,11 +1175,11 @@ def f(): except RuntimeError: print('ok') else: - print('skip') + print('!skip!') """ _, out, err = assert_python_ok("-u", "-c", code) out = out.strip() - if out == b'skip': + if b'!skip!' in out: self.skipTest('RLIMIT_NPROC had no effect; probably superuser') self.assertEqual(out, b'ok') self.assertEqual(err, b'') From c3bb32de9df7cb22b40b06098d2f956a8ff1e4d0 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:00:30 +0100 Subject: [PATCH 28/43] [3.12] gh-124008: Fix calculation of the number of written bytes for the Windows console (GH-124059) (GH-127326) Since MultiByteToWideChar()/WideCharToMultiByte() is not reversible if the data contains invalid UTF-8 sequences, use binary search to calculate the number of written bytes from the number of written characters. Also fix writing incomplete UTF-8 sequences. Also fix handling of memory allocation failures. (cherry picked from commit 3cf83d91a5baf3600dd60f7aaaf4fb6d73c4b8a9) Co-authored-by: Serhiy Storchaka --- Lib/test/test_winconsoleio.py | 23 ++++ ...-09-13-18-24-27.gh-issue-124008.XaiPQx.rst | 2 + Modules/_io/winconsoleio.c | 118 +++++++++++++----- 3 files changed, 115 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py index cf8b105823b262..459d219290e432 100644 --- a/Lib/test/test_winconsoleio.py +++ b/Lib/test/test_winconsoleio.py @@ -126,6 +126,29 @@ def test_write_empty_data(self): with ConIO('CONOUT$', 'w') as f: self.assertEqual(f.write(b''), 0) + @requires_resource('console') + def test_write(self): + testcases = [] + with ConIO('CONOUT$', 'w') as f: + for a in [ + b'', + b'abc', + b'\xc2\xa7\xe2\x98\x83\xf0\x9f\x90\x8d', + b'\xff'*10, + ]: + for b in b'\xc2\xa7', b'\xe2\x98\x83', b'\xf0\x9f\x90\x8d': + testcases.append(a + b) + for i in range(1, len(b)): + data = a + b[:i] + testcases.append(data + b'z') + testcases.append(data + b'\xff') + # incomplete multibyte sequence + with self.subTest(data=data): + self.assertEqual(f.write(data), len(a)) + for data in testcases: + with self.subTest(data=data): + self.assertEqual(f.write(data), len(data)) + def assertStdinRoundTrip(self, text): stdin = open('CONIN$', 'r') old_stdin = sys.stdin diff --git a/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst b/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst new file mode 100644 index 00000000000000..cd6dd9a7a97e90 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst @@ -0,0 +1,2 @@ +Fix possible crash (in debug build), incorrect output or returning incorrect +value from raw binary ``write()`` when writing to console on Windows. diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index c2c365e0807f0a..da4907ec2db2a8 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -135,19 +135,67 @@ char _PyIO_get_console_type(PyObject *path_or_fd) { } static DWORD -_find_last_utf8_boundary(const char *buf, DWORD len) +_find_last_utf8_boundary(const unsigned char *buf, DWORD len) { - /* This function never returns 0, returns the original len instead */ - DWORD count = 1; - if (len == 0 || (buf[len - 1] & 0x80) == 0) { - return len; - } - for (;; count++) { - if (count > 3 || count >= len) { + for (DWORD count = 1; count < 4 && count <= len; count++) { + unsigned char c = buf[len - count]; + if (c < 0x80) { + /* No starting byte found. */ return len; } - if ((buf[len - count] & 0xc0) != 0x80) { - return len - count; + if (c >= 0xc0) { + if (c < 0xe0 /* 2-bytes sequence */ ? count < 2 : + c < 0xf0 /* 3-bytes sequence */ ? count < 3 : + c < 0xf8 /* 4-bytes sequence */) + { + /* Incomplete multibyte sequence. */ + return len - count; + } + /* Either complete or invalid sequence. */ + return len; + } + } + /* Either complete 4-bytes sequence or invalid sequence. */ + return len; +} + +/* Find the number of UTF-8 bytes that corresponds to the specified number of + * wchars. + * I.e. find x <= len so that MultiByteToWideChar(CP_UTF8, 0, s, x, NULL, 0) == n. + * + * WideCharToMultiByte() cannot be used for this, because the UTF-8 -> wchar + * conversion is not reversible (invalid UTF-8 byte produces \ufffd which + * will be converted back to 3-bytes UTF-8 sequence \xef\xbf\xbd). + * So we need to use binary search. + */ +static DWORD +_wchar_to_utf8_count(const unsigned char *s, DWORD len, DWORD n) +{ + DWORD start = 0; + while (1) { + DWORD mid = 0; + for (DWORD i = len / 2; i <= len; i++) { + mid = _find_last_utf8_boundary(s, i); + if (mid != 0) { + break; + } + /* The middle could split the first multibytes sequence. */ + } + if (mid == len) { + return start + len; + } + if (mid == 0) { + mid = len > 1 ? len - 1 : 1; + } + DWORD wlen = MultiByteToWideChar(CP_UTF8, 0, s, mid, NULL, 0); + if (wlen <= n) { + s += mid; + start += mid; + len -= mid; + n -= wlen; + } + else { + len = mid; } } } @@ -556,8 +604,10 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) { int err = 0, sig = 0; wchar_t *buf = (wchar_t*)PyMem_Malloc(maxlen * sizeof(wchar_t)); - if (!buf) + if (!buf) { + PyErr_NoMemory(); goto error; + } *readlen = 0; @@ -615,6 +665,7 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) { Py_UNBLOCK_THREADS if (!newbuf) { sig = -1; + PyErr_NoMemory(); break; } buf = newbuf; @@ -638,8 +689,10 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) { if (*readlen > 0 && buf[0] == L'\x1a') { PyMem_Free(buf); buf = (wchar_t *)PyMem_Malloc(sizeof(wchar_t)); - if (!buf) + if (!buf) { + PyErr_NoMemory(); goto error; + } buf[0] = L'\0'; *readlen = 0; } @@ -817,8 +870,10 @@ _io__WindowsConsoleIO_readall_impl(winconsoleio *self) bufsize = BUFSIZ; buf = (wchar_t*)PyMem_Malloc((bufsize + 1) * sizeof(wchar_t)); - if (buf == NULL) + if (buf == NULL) { + PyErr_NoMemory(); return NULL; + } while (1) { wchar_t *subbuf; @@ -840,6 +895,7 @@ _io__WindowsConsoleIO_readall_impl(winconsoleio *self) (bufsize + 1) * sizeof(wchar_t)); if (tmp == NULL) { PyMem_Free(buf); + PyErr_NoMemory(); return NULL; } buf = tmp; @@ -1015,43 +1071,49 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls, len = (DWORD)b->len; Py_BEGIN_ALLOW_THREADS - wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0); - /* issue11395 there is an unspecified upper bound on how many bytes can be written at once. We cap at 32k - the caller will have to handle partial writes. Since we don't know how many input bytes are being ignored, we have to reduce and recalculate. */ - while (wlen > 32766 / sizeof(wchar_t)) { - len /= 2; + const DWORD max_wlen = 32766 / sizeof(wchar_t); + /* UTF-8 to wchar ratio is at most 3:1. */ + len = Py_MIN(len, max_wlen * 3); + while (1) { /* Fix for github issues gh-110913 and gh-82052. */ len = _find_last_utf8_boundary(b->buf, len); wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0); + if (wlen <= max_wlen) { + break; + } + len /= 2; } Py_END_ALLOW_THREADS - if (!wlen) - return PyErr_SetFromWindowsErr(0); + if (!wlen) { + return PyLong_FromLong(0); + } wbuf = (wchar_t*)PyMem_Malloc(wlen * sizeof(wchar_t)); + if (!wbuf) { + PyErr_NoMemory(); + return NULL; + } Py_BEGIN_ALLOW_THREADS wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, wbuf, wlen); if (wlen) { res = WriteConsoleW(handle, wbuf, wlen, &n, NULL); +#ifdef Py_DEBUG + if (res) { +#else if (res && n < wlen) { +#endif /* Wrote fewer characters than expected, which means our * len value may be wrong. So recalculate it from the - * characters that were written. As this could potentially - * result in a different value, we also validate that value. + * characters that were written. */ - len = WideCharToMultiByte(CP_UTF8, 0, wbuf, n, - NULL, 0, NULL, NULL); - if (len) { - wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, - NULL, 0); - assert(wlen == len); - } + len = _wchar_to_utf8_count(b->buf, len, n); } } else res = 0; From f4c9f391650ebca213905c1308f87abac670033e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 28 Nov 2024 14:57:35 +0100 Subject: [PATCH 29/43] [3.12] gh-123967: Fix faulthandler for trampoline frames (#127329) (#127363) gh-123967: Fix faulthandler for trampoline frames (#127329) If the top-most frame is a trampoline frame, skip it. (cherry picked from commit 58e334e1431b2ed6b70ee42501ea73e08084e769) --- ...-11-27-14-06-35.gh-issue-123967.wxUmnW.rst | 2 ++ Python/traceback.c | 23 +++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst diff --git a/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst b/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst new file mode 100644 index 00000000000000..788fe0c78ef257 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst @@ -0,0 +1,2 @@ +Fix faulthandler for trampoline frames. If the top-most frame is a +trampoline frame, skip it. Patch by Victor Stinner. diff --git a/Python/traceback.c b/Python/traceback.c index fdaf19d37074dd..fba3594e97ceac 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -1242,6 +1242,8 @@ _Py_DumpASCII(int fd, PyObject *text) static void dump_frame(int fd, _PyInterpreterFrame *frame) { + assert(frame->owner != FRAME_OWNED_BY_CSTACK); + PyCodeObject *code = frame->f_code; PUTS(fd, " File "); if (code->co_filename != NULL @@ -1315,24 +1317,27 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) unsigned int depth = 0; while (1) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + /* Trampoline frame */ + frame = frame->previous; + if (frame == NULL) { + break; + } + + /* Can't have more than one shim frame in a row */ + assert(frame->owner != FRAME_OWNED_BY_CSTACK); + } + if (MAX_FRAME_DEPTH <= depth) { PUTS(fd, " ...\n"); break; } + dump_frame(fd, frame); frame = frame->previous; if (frame == NULL) { break; } - if (frame->owner == FRAME_OWNED_BY_CSTACK) { - /* Trampoline frame */ - frame = frame->previous; - } - if (frame == NULL) { - break; - } - /* Can't have more than one shim frame in a row */ - assert(frame->owner != FRAME_OWNED_BY_CSTACK); depth++; } } From 7d175caf212565b3b89cf3c537c38adab52306a5 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Thu, 28 Nov 2024 18:32:50 +0100 Subject: [PATCH 30/43] [3.12] gh-127190: Fix local_setattro() error handling (GH-127366) (#127368) gh-127190: Fix local_setattro() error handling (GH-127366) Don't make the assumption that the 'name' argument is a string. Use repr() to format the 'name' argument instead. (cherry picked from commit 20657fbdb14d50ca4ec115da0cbef155871d8d33) Co-authored-by: Victor Stinner --- Lib/test/test_threading_local.py | 15 +++++++++++++++ Modules/_threadmodule.c | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_threading_local.py b/Lib/test/test_threading_local.py index f0b829a978feb5..3a58afd8194a32 100644 --- a/Lib/test/test_threading_local.py +++ b/Lib/test/test_threading_local.py @@ -208,6 +208,21 @@ def test_threading_local_clear_race(self): _testcapi.join_temporary_c_thread() + @support.cpython_only + def test_error(self): + class Loop(self._local): + attr = 1 + + # Trick the "if name == '__dict__':" test of __setattr__() + # to always be true + class NameCompareTrue: + def __eq__(self, other): + return True + + loop = Loop() + with self.assertRaisesRegex(AttributeError, 'Loop.*read-only'): + loop.__setattr__(NameCompareTrue(), 2) + class ThreadLocalTest(unittest.TestCase, BaseLocalTest): _local = _thread._local diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 518c246e98caf6..366ee6186de0d5 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -961,7 +961,7 @@ local_setattro(localobject *self, PyObject *name, PyObject *v) } if (r == 1) { PyErr_Format(PyExc_AttributeError, - "'%.100s' object attribute '%U' is read-only", + "'%.100s' object attribute %R is read-only", Py_TYPE(self)->tp_name, name); return -1; } From 91399c39845b7ca4a48b5feaec030a349ff3f68e Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 29 Nov 2024 10:22:27 +0100 Subject: [PATCH 31/43] [3.12] gh-127303: Add docs for token.EXACT_TOKEN_TYPES (GH-127304) (#127391) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gh-127303: Add docs for token.EXACT_TOKEN_TYPES (GH-127304) --------- (cherry picked from commit dd3a87d2a8f8750978359a99de2c5cb2168351d1) Co-authored-by: Илья Любавский <100635212+lubaskinc0de@users.noreply.github.com> Co-authored-by: Terry Jan Reedy Co-authored-by: Tomas R. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/token.rst | 7 +++++++ Lib/token.py | 3 ++- Misc/ACKS | 1 + .../Library/2024-11-27-16-06-10.gh-issue-127303.asqkgh.rst | 1 + Tools/build/generate_token.py | 3 ++- 5 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-27-16-06-10.gh-issue-127303.asqkgh.rst diff --git a/Doc/library/token.rst b/Doc/library/token.rst index e27a3b96d8fade..6d06ae8205dc01 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -79,6 +79,13 @@ the :mod:`tokenize` module. ``type_comments=True``. +.. data:: EXACT_TOKEN_TYPES + + A dictionary mapping the string representation of a token to its numeric code. + + .. versionadded:: 3.8 + + .. versionchanged:: 3.5 Added :data:`AWAIT` and :data:`ASYNC` tokens. diff --git a/Lib/token.py b/Lib/token.py index 487f6edd3c951c..e26d36bd64e3c1 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -1,7 +1,8 @@ """Token constants.""" # Auto-generated by Tools/build/generate_token.py -__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] +__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF', + 'EXACT_TOKEN_TYPES'] ENDMARKER = 0 NAME = 1 diff --git a/Misc/ACKS b/Misc/ACKS index cb9de23eeeac11..0f8b79f26fbf6b 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1139,6 +1139,7 @@ Mark Lutz Taras Lyapun Jim Lynch Mikael Lyngvig +Ilya Lyubavski Jeff MacDonald John Machin Andrew I MacIntyre diff --git a/Misc/NEWS.d/next/Library/2024-11-27-16-06-10.gh-issue-127303.asqkgh.rst b/Misc/NEWS.d/next/Library/2024-11-27-16-06-10.gh-issue-127303.asqkgh.rst new file mode 100644 index 00000000000000..58ebf5d0abe141 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-16-06-10.gh-issue-127303.asqkgh.rst @@ -0,0 +1 @@ +Publicly expose :data:`~token.EXACT_TOKEN_TYPES` in :attr:`!token.__all__`. diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py index 3bd307c1733867..6d27bc41ba2663 100755 --- a/Tools/build/generate_token.py +++ b/Tools/build/generate_token.py @@ -226,7 +226,8 @@ def make_rst(infile, outfile='Doc/library/token-list.inc'): # {AUTO_GENERATED_BY_SCRIPT} ''' token_py_template += ''' -__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] +__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF', + 'EXACT_TOKEN_TYPES'] %s N_TOKENS = %d From fc6c2de6de787b99ed41610dda09eb227d5e2726 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 29 Nov 2024 11:59:22 +0100 Subject: [PATCH 32/43] [3.12] gh-127258: Fix asyncio test_staggered_race_with_eager_tasks() (GH-127358) (#127402) gh-127258: Fix asyncio test_staggered_race_with_eager_tasks() (GH-127358) Replace the sleep(2) with a task which is blocked forever. (cherry picked from commit bfabf96b50b7d6a9c15b298a86ba3633b05a1fd7) Co-authored-by: Victor Stinner --- Lib/test/test_asyncio/test_eager_task_factory.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_asyncio/test_eager_task_factory.py b/Lib/test/test_asyncio/test_eager_task_factory.py index b06832e02f00d6..e2a54dd8d768c8 100644 --- a/Lib/test/test_asyncio/test_eager_task_factory.py +++ b/Lib/test/test_asyncio/test_eager_task_factory.py @@ -225,10 +225,14 @@ async def fail(): await asyncio.sleep(0) raise ValueError("no good") + async def blocked(): + fut = asyncio.Future() + await fut + async def run(): winner, index, excs = await asyncio.staggered.staggered_race( [ - lambda: asyncio.sleep(2, result="sleep2"), + lambda: blocked(), lambda: asyncio.sleep(1, result="sleep1"), lambda: fail() ], From fc0564b3658f4bda9e808042cb9752fb56853bc1 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 29 Nov 2024 13:02:41 +0200 Subject: [PATCH 33/43] [3.12] gh-127359: Pin Tcl/Tk to 8 (8.6) for testing macOS (GH-127365) (#127394) --- .github/workflows/reusable-macos.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 6df272c8839f16..acaab45e5b5e12 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -36,7 +36,10 @@ jobs: path: config.cache key: ${{ github.job }}-${{ inputs.os }}-${{ env.IMAGE_VERSION }}-${{ inputs.config_hash }} - name: Install Homebrew dependencies - run: brew install pkg-config openssl@3.0 xz gdbm tcl-tk + run: | + brew install pkg-config openssl@3.0 xz gdbm tcl-tk@8 + # Because alternate versions are not symlinked into place by default: + brew link tcl-tk@8 - name: Configure CPython run: | GDBM_CFLAGS="-I$(brew --prefix gdbm)/include" \ From e7ab97862deb8ad69caccbc5c6e1f7b4e670afbd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 29 Nov 2024 17:03:24 +0100 Subject: [PATCH 34/43] [3.12] gh-127208: Reject null character in _imp.create_dynamic() (#127400) (#127419) gh-127208: Reject null character in _imp.create_dynamic() (#127400) _imp.create_dynamic() now rejects embedded null characters in the path and in the module name. Backport also the _PyUnicode_AsUTF8NoNUL() function. (cherry picked from commit b14fdadc6c620875a20b7ccc3c9b069e85d8557a) --- Include/internal/pycore_unicodeobject.h | 3 +++ Lib/test/test_import/__init__.py | 13 +++++++++++++ Objects/unicodeobject.c | 12 ++++++++++++ Python/import.c | 8 +++++--- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 44eccdea55beb0..cecdabe4155063 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -76,6 +76,9 @@ struct _Py_unicode_state { extern void _PyUnicode_ClearInterned(PyInterpreterState *interp); +// Like PyUnicode_AsUTF8(), but check for embedded null characters. +extern const char* _PyUnicode_AsUTF8NoNUL(PyObject *); + #ifdef __cplusplus } diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index c6accc4183a67f..81b2f33a840f1d 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -787,6 +787,19 @@ def test_issue105979(self): self.assertIn("Frozen object named 'x' is invalid", str(cm.exception)) + def test_create_dynamic_null(self): + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec: + name = "a\x00b" + origin = "abc" + _imp.create_dynamic(Spec()) + + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec2: + name = "abc" + origin = "a\x00b" + _imp.create_dynamic(Spec2()) + @skip_if_dont_write_bytecode class FilePermissionTests(unittest.TestCase): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7bd4b221c83cf7..5f3dd26ad7b762 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3991,6 +3991,18 @@ PyUnicode_AsUTF8(PyObject *unicode) return PyUnicode_AsUTF8AndSize(unicode, NULL); } +const char * +_PyUnicode_AsUTF8NoNUL(PyObject *unicode) +{ + Py_ssize_t size; + const char *s = PyUnicode_AsUTF8AndSize(unicode, &size); + if (s && strlen(s) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return s; +} + /* PyUnicode_GetSize() has been deprecated since Python 3.3 because it returned length of Py_UNICODE. diff --git a/Python/import.c b/Python/import.c index 4d0b7fd95569b3..66391c04a0baaa 100644 --- a/Python/import.c +++ b/Python/import.c @@ -917,12 +917,14 @@ extensions_lock_release(void) static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { - Py_ssize_t str1_len, str2_len; - const char *str1_data = PyUnicode_AsUTF8AndSize(str1, &str1_len); - const char *str2_data = PyUnicode_AsUTF8AndSize(str2, &str2_len); + const char *str1_data = _PyUnicode_AsUTF8NoNUL(str1); + const char *str2_data = _PyUnicode_AsUTF8NoNUL(str2); if (str1_data == NULL || str2_data == NULL) { return NULL; } + Py_ssize_t str1_len = strlen(str1_data); + Py_ssize_t str2_len = strlen(str2_data); + /* Make sure sep and the NULL byte won't cause an overflow. */ assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; From 7c7ed49b518f986806edf2f3d67198cb516b7a98 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 30 Nov 2024 10:05:46 +0100 Subject: [PATCH 35/43] [3.12] gh-88110: Clear concurrent.futures.thread._threads_queues after fork to avoid joining parent process' threads (GH-126098) (GH-127164) Threads are gone after fork, so clear the queues too. Otherwise the child process (here created via multiprocessing.Process) crashes on interpreter exit. (cherry picked from commit 1848ce61f349533ae5892a8c24c2e0e3c364fc8a) Co-authored-by: Andrei Bodrov Co-authored-by: Serhiy Storchaka --- Lib/concurrent/futures/thread.py | 1 + .../test_thread_pool.py | 19 +++++++++++++++++++ ...3-02-15-23-54-42.gh-issue-88110.KU6erv.rst | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py index 3b3a36a5093336..61dbff8a485bcd 100644 --- a/Lib/concurrent/futures/thread.py +++ b/Lib/concurrent/futures/thread.py @@ -41,6 +41,7 @@ def _python_exit(): os.register_at_fork(before=_global_shutdown_lock.acquire, after_in_child=_global_shutdown_lock._at_fork_reinit, after_in_parent=_global_shutdown_lock.release) + os.register_at_fork(after_in_child=_threads_queues.clear) class _WorkItem: diff --git a/Lib/test/test_concurrent_futures/test_thread_pool.py b/Lib/test/test_concurrent_futures/test_thread_pool.py index 812f989d8f3ad2..6e4a4b7caff481 100644 --- a/Lib/test/test_concurrent_futures/test_thread_pool.py +++ b/Lib/test/test_concurrent_futures/test_thread_pool.py @@ -64,6 +64,25 @@ def submit(pool): with futures.ProcessPoolExecutor(1, mp_context=mp.get_context('fork')) as workers: workers.submit(tuple) + @support.requires_fork() + @unittest.skipUnless(hasattr(os, 'register_at_fork'), 'need os.register_at_fork') + def test_process_fork_from_a_threadpool(self): + # bpo-43944: clear concurrent.futures.thread._threads_queues after fork, + # otherwise child process will try to join parent thread + def fork_process_and_return_exitcode(): + # Ignore the warning about fork with threads. + with self.assertWarnsRegex(DeprecationWarning, + r"use of fork\(\) may lead to deadlocks in the child"): + p = mp.get_context('fork').Process(target=lambda: 1) + p.start() + p.join() + return p.exitcode + + with futures.ThreadPoolExecutor(1) as pool: + process_exitcode = pool.submit(fork_process_and_return_exitcode).result() + + self.assertEqual(process_exitcode, 0) + def test_executor_map_current_future_cancel(self): stop_event = threading.Event() log = [] diff --git a/Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst b/Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst new file mode 100644 index 00000000000000..42a83edc3ba68d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst @@ -0,0 +1,2 @@ +Fixed :class:`multiprocessing.Process` reporting a ``.exitcode`` of 1 even on success when +using the ``"fork"`` start method while using a :class:`concurrent.futures.ThreadPoolExecutor`. From ea8a85bb9eb54b9edee1a815cff797310ec13f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Nov 2024 11:02:07 +0100 Subject: [PATCH 36/43] [3.12] Link to correct class methods in asyncio primitives docs (GH-127270) (#127438) --- Doc/library/asyncio-sync.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/asyncio-sync.rst b/Doc/library/asyncio-sync.rst index 05bdf5488af143..4caa8b9b47564a 100644 --- a/Doc/library/asyncio-sync.rst +++ b/Doc/library/asyncio-sync.rst @@ -262,8 +262,9 @@ Condition Wait until a predicate becomes *true*. The predicate must be a callable which result will be - interpreted as a boolean value. The final value is the - return value. + interpreted as a boolean value. The method will repeatedly + :meth:`~Condition.wait` until the predicate evaluates to *true*. + The final value is the return value. Semaphore @@ -428,7 +429,7 @@ Barrier .. coroutinemethod:: abort() Put the barrier into a broken state. This causes any active or future - calls to :meth:`wait` to fail with the :class:`BrokenBarrierError`. + calls to :meth:`~Barrier.wait` to fail with the :class:`BrokenBarrierError`. Use this for example if one of the tasks needs to abort, to avoid infinite waiting tasks. From f08c82ecad5106a32be0c1b65c4d5be3677ab2cd Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 1 Dec 2024 08:50:33 +0100 Subject: [PATCH 37/43] [3.12] Add the missing `f` on an f-string error message in multiprocessing. (GH-127462) (#127465) Add the missing `f` on an f-string error message in multiprocessing. (GH-127462) (cherry picked from commit 11c01092d5fa8f02c867a7f1f3c135ce63db4838) Co-authored-by: Gregory P. Smith --- Lib/multiprocessing/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index d0582e3cd5406a..fdbc3bda7db7e0 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -956,7 +956,7 @@ def answer_challenge(connection, authkey: bytes): f'Protocol error, expected challenge: {message=}') message = message[len(_CHALLENGE):] if len(message) < _MD5ONLY_MESSAGE_LENGTH: - raise AuthenticationError('challenge too short: {len(message)} bytes') + raise AuthenticationError(f'challenge too short: {len(message)} bytes') digest = _create_response(authkey, message) connection.send_bytes(digest) response = connection.recv_bytes(256) # reject large message From 4000cbd46a1f83536eeda690733b81c535b225ed Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Sun, 1 Dec 2024 17:51:46 +0900 Subject: [PATCH 38/43] [3.12] Docs: Fix incorrect indents in `c-api/type.rst` (GH-127449) (#127461) (cherry picked from commit 33ce8dcf791721fea563715f681dc1593a35b83b) --- Doc/c-api/type.rst | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index 94c7a9a7b41981..1564fa94efa9bf 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -465,19 +465,19 @@ The following functions and structs are used to create The following “offset” fields cannot be set using :c:type:`PyType_Slot`: - * :c:member:`~PyTypeObject.tp_weaklistoffset` - (use :c:macro:`Py_TPFLAGS_MANAGED_WEAKREF` instead if possible) - * :c:member:`~PyTypeObject.tp_dictoffset` - (use :c:macro:`Py_TPFLAGS_MANAGED_DICT` instead if possible) - * :c:member:`~PyTypeObject.tp_vectorcall_offset` - (use ``"__vectorcalloffset__"`` in - :ref:`PyMemberDef `) - - If it is not possible to switch to a ``MANAGED`` flag (for example, - for vectorcall or to support Python older than 3.12), specify the - offset in :c:member:`Py_tp_members `. - See :ref:`PyMemberDef documentation ` - for details. + * :c:member:`~PyTypeObject.tp_weaklistoffset` + (use :c:macro:`Py_TPFLAGS_MANAGED_WEAKREF` instead if possible) + * :c:member:`~PyTypeObject.tp_dictoffset` + (use :c:macro:`Py_TPFLAGS_MANAGED_DICT` instead if possible) + * :c:member:`~PyTypeObject.tp_vectorcall_offset` + (use ``"__vectorcalloffset__"`` in + :ref:`PyMemberDef `) + + If it is not possible to switch to a ``MANAGED`` flag (for example, + for vectorcall or to support Python older than 3.12), specify the + offset in :c:member:`Py_tp_members `. + See :ref:`PyMemberDef documentation ` + for details. The following fields cannot be set at all when creating a heap type: @@ -497,14 +497,13 @@ The following functions and structs are used to create To avoid issues, use the *bases* argument of :c:func:`PyType_FromSpecWithBases` instead. - .. versionchanged:: 3.9 + .. versionchanged:: 3.9 + Slots in :c:type:`PyBufferProcs` may be set in the unlimited API. - Slots in :c:type:`PyBufferProcs` may be set in the unlimited API. - - .. versionchanged:: 3.11 - :c:member:`~PyBufferProcs.bf_getbuffer` and - :c:member:`~PyBufferProcs.bf_releasebuffer` are now available - under the :ref:`limited API `. + .. versionchanged:: 3.11 + :c:member:`~PyBufferProcs.bf_getbuffer` and + :c:member:`~PyBufferProcs.bf_releasebuffer` are now available + under the :ref:`limited API `. .. c:member:: void *pfunc From 87b712228efa87245a8b64092262d9e3a0193985 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 1 Dec 2024 10:24:18 +0100 Subject: [PATCH 39/43] [3.12] gh-127356: Fix prepend doctrees directory for gettext target (GH-127357) (#127471) Co-authored-by: Rafael Fontenelle --- Doc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/Makefile b/Doc/Makefile index 22e43ee3e542ee..4a704ad58b33d3 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -144,7 +144,7 @@ pydoc-topics: build .PHONY: gettext gettext: BUILDER = gettext -gettext: SPHINXOPTS += -d build/doctrees-gettext +gettext: override SPHINXOPTS := -d build/doctrees-gettext $(SPHINXOPTS) gettext: build .PHONY: htmlview From 828db9016bf7d13cb935af079fad26adf4e2b49d Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:29:34 +0100 Subject: [PATCH 40/43] [3.12] gh-127443: Fix some entries in `Doc/data/refcounts.dat` (GH-127451) (#127497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gh-127443: Fix some entries in `Doc/data/refcounts.dat` (GH-127451) Fix incorrect entries in `Doc/data/refcounts.dat` (cherry picked from commit 1f8267b85dda655282922ba20df90d0ac6bea634) Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/data/refcounts.dat | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index b5d953247f3c46..28532620ba603d 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -180,7 +180,7 @@ PyCapsule_IsValid:const char*:name:: PyCapsule_New:PyObject*::+1: PyCapsule_New:void*:pointer:: PyCapsule_New:const char *:name:: -PyCapsule_New::void (* destructor)(PyObject* ):: +PyCapsule_New:void (*)(PyObject *):destructor:: PyCapsule_SetContext:int::: PyCapsule_SetContext:PyObject*:self:0: @@ -349,11 +349,11 @@ PyComplex_CheckExact:int::: PyComplex_CheckExact:PyObject*:p:0: PyComplex_FromCComplex:PyObject*::+1: -PyComplex_FromCComplex::Py_complex v:: +PyComplex_FromCComplex:Py_complex:v:: PyComplex_FromDoubles:PyObject*::+1: -PyComplex_FromDoubles::double real:: -PyComplex_FromDoubles::double imag:: +PyComplex_FromDoubles:double:real:: +PyComplex_FromDoubles:double:imag:: PyComplex_ImagAsDouble:double::: PyComplex_ImagAsDouble:PyObject*:op:0: @@ -622,7 +622,9 @@ PyErr_GetExcInfo:PyObject**:pvalue:+1: PyErr_GetExcInfo:PyObject**:ptraceback:+1: PyErr_GetRaisedException:PyObject*::+1: -PyErr_SetRaisedException:::: + +PyErr_SetRaisedException:void::: +PyErr_SetRaisedException:PyObject *:exc:0:stolen PyErr_GivenExceptionMatches:int::: PyErr_GivenExceptionMatches:PyObject*:given:0: @@ -642,9 +644,9 @@ PyErr_NewExceptionWithDoc:PyObject*:dict:0: PyErr_NoMemory:PyObject*::null: PyErr_NormalizeException:void::: -PyErr_NormalizeException:PyObject**:exc::??? -PyErr_NormalizeException:PyObject**:val::??? -PyErr_NormalizeException:PyObject**:tb::??? +PyErr_NormalizeException:PyObject**:exc:+1:??? +PyErr_NormalizeException:PyObject**:val:+1:??? +PyErr_NormalizeException:PyObject**:tb:+1:??? PyErr_Occurred:PyObject*::0: @@ -1268,7 +1270,7 @@ PyMapping_GetItemString:const char*:key:: PyMapping_HasKey:int::: PyMapping_HasKey:PyObject*:o:0: -PyMapping_HasKey:PyObject*:key:: +PyMapping_HasKey:PyObject*:key:0: PyMapping_HasKeyString:int::: PyMapping_HasKeyString:PyObject*:o:0: @@ -1428,7 +1430,7 @@ PyModule_GetState:void*::: PyModule_GetState:PyObject*:module:0: PyModule_New:PyObject*::+1: -PyModule_New::char* name:: +PyModule_New:char*:name:: PyModule_NewObject:PyObject*::+1: PyModule_NewObject:PyObject*:name:+1: @@ -1438,7 +1440,7 @@ PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:0: +PyModuleDef_Init:PyModuleDef*:def:: PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: @@ -1950,10 +1952,10 @@ PyRun_StringFlags:PyObject*:locals:0: PyRun_StringFlags:PyCompilerFlags*:flags:: PySeqIter_Check:int::: -PySeqIter_Check::op:: +PySeqIter_Check:PyObject *:op:0: PySeqIter_New:PyObject*::+1: -PySeqIter_New:PyObject*:seq:: +PySeqIter_New:PyObject*:seq:0: PySequence_Check:int::: PySequence_Check:PyObject*:o:0: @@ -2393,7 +2395,7 @@ PyUnicode_GET_DATA_SIZE:PyObject*:o:0: PyUnicode_KIND:int::: PyUnicode_KIND:PyObject*:o:0: -PyUnicode_MAX_CHAR_VALUE:::: +PyUnicode_MAX_CHAR_VALUE:Py_UCS4::: PyUnicode_MAX_CHAR_VALUE:PyObject*:o:0: PyUnicode_AS_UNICODE:Py_UNICODE*::: @@ -2480,7 +2482,7 @@ PyUnicode_FromWideChar:const wchar_t*:w:: PyUnicode_FromWideChar:Py_ssize_t:size:: PyUnicode_AsWideChar:Py_ssize_t::: -PyUnicode_AsWideChar:PyObject*:*unicode:0: +PyUnicode_AsWideChar:PyObject*:unicode:0: PyUnicode_AsWideChar:wchar_t*:w:: PyUnicode_AsWideChar:Py_ssize_t:size:: @@ -2533,7 +2535,7 @@ PyUnicode_AsUTF8String:PyObject*:unicode:0: PyUnicode_AsUTF8AndSize:const char*::: PyUnicode_AsUTF8AndSize:PyObject*:unicode:0: -PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:0: +PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:: PyUnicode_AsUTF8:const char*::: PyUnicode_AsUTF8:PyObject*:unicode:0: @@ -2856,13 +2858,13 @@ PyUnicodeDecodeError_SetStart:PyObject*:exc:0: PyUnicodeDecodeError_SetStart:Py_ssize_t:start:: PyWeakref_Check:int::: -PyWeakref_Check:PyObject*:ob:: +PyWeakref_Check:PyObject*:ob:0: PyWeakref_CheckProxy:int::: -PyWeakref_CheckProxy:PyObject*:ob:: +PyWeakref_CheckProxy:PyObject*:ob:0: PyWeakref_CheckRef:int::: -PyWeakref_CheckRef:PyObject*:ob:: +PyWeakref_CheckRef:PyObject*:ob:0: PyWeakref_GET_OBJECT:PyObject*::0: PyWeakref_GET_OBJECT:PyObject*:ref:0: From 9f0d6b71d677363f305c63cb5601c1410310435e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:24:26 +0100 Subject: [PATCH 41/43] [3.12] Fix Unicode encode_wstr_utf8() (#127420) (#127504) Fix Unicode encode_wstr_utf8() (#127420) Raise RuntimeError instead of RuntimeWarning. Co-authored-by: Victor Stinner --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f3dd26ad7b762..ba11351f003007 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15446,7 +15446,7 @@ encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) int res; res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT); if (res == -2) { - PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name); + PyErr_Format(PyExc_RuntimeError, "cannot encode %s", name); return -1; } if (res < 0) { From 22f8e430f81b8b8966bff0efe65786515a132b7e Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:52:49 +0100 Subject: [PATCH 42/43] [3.12] gh-99880: document rounding mode for new-style formatting (GH-121481) (#126335) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPython uses _Py_dg_dtoa(), which does rounding to nearest with half to even tie-breaking rule. If that functions is unavailable, PyOS_double_to_string() fallbacks to system snprintf(). Since CPython 3.12, build requirements include C11 compiler *and* support for IEEE 754 floating point numbers (Annex F). This means that FE_TONEAREST macro is available and, per default, printf-like functions should use same rounding mode as _Py_dg_dtoa(). (cherry picked from commit 7d7d56d8b1147a6b85e1c09d01b164df7c5c4942) Co-authored-by: Sergey B Kirpichev Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/string.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 9e8e44a8abe770..a000bb49f14800 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -589,6 +589,11 @@ The available presentation types for :class:`float` and | | as altered by the other format modifiers. | +---------+----------------------------------------------------------+ +The result should be correctly rounded to a given precision ``p`` of digits +after the decimal point. The rounding mode for :class:`float` matches that +of the :func:`round` builtin. For :class:`~decimal.Decimal`, the rounding +mode of the current :ref:`context ` will be used. + The available presentation types for :class:`complex` are the same as those for :class:`float` (``'%'`` is not allowed). Both the real and imaginary components of a complex number are formatted as floating-point numbers, according to the From 9295a1d0334d6b94ff5ae5b13219695954cc6b4c Mon Sep 17 00:00:00 2001 From: Carol Willing Date: Mon, 2 Dec 2024 05:55:31 -0800 Subject: [PATCH 43/43] [3.12] Docs: Miscellaneous corrections to simple statements in the language reference (GH-126720) (#126891) * Replace: The :keyword:`global` -> The :keyword:`global` statement Add :keyword: when it's needed * Replace repeated links with duoble backticks (cherry picked from commit 94a7a4e22fb8f567090514785c69e65298acca42) Co-authored-by: Beomsoo Kim --- Doc/reference/simple_stmts.rst | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/Doc/reference/simple_stmts.rst b/Doc/reference/simple_stmts.rst index 618664b23f0680..5e3ede69d1e5d7 100644 --- a/Doc/reference/simple_stmts.rst +++ b/Doc/reference/simple_stmts.rst @@ -406,9 +406,9 @@ The extended form, ``assert expression1, expression2``, is equivalent to :: These equivalences assume that :const:`__debug__` and :exc:`AssertionError` refer to the built-in variables with those names. In the current implementation, the -built-in variable :const:`__debug__` is ``True`` under normal circumstances, +built-in variable ``__debug__`` is ``True`` under normal circumstances, ``False`` when optimization is requested (command line option :option:`-O`). The current -code generator emits no code for an assert statement when optimization is +code generator emits no code for an :keyword:`assert` statement when optimization is requested at compile time. Note that it is unnecessary to include the source code for the expression that failed in the error message; it will be displayed as part of the stack trace. @@ -531,8 +531,8 @@ The :keyword:`!yield` statement yield_stmt: `yield_expression` A :keyword:`yield` statement is semantically equivalent to a :ref:`yield -expression `. The yield statement can be used to omit the parentheses -that would otherwise be required in the equivalent yield expression +expression `. The ``yield`` statement can be used to omit the +parentheses that would otherwise be required in the equivalent yield expression statement. For example, the yield statements :: yield @@ -544,7 +544,7 @@ are equivalent to the yield expression statements :: (yield from ) Yield expressions and statements are only used when defining a :term:`generator` -function, and are only used in the body of the generator function. Using yield +function, and are only used in the body of the generator function. Using :keyword:`yield` in a function definition is sufficient to cause that definition to create a generator function instead of a normal function. @@ -964,25 +964,14 @@ The :keyword:`!global` statement .. productionlist:: python-grammar global_stmt: "global" `identifier` ("," `identifier`)* -The :keyword:`global` statement is a declaration which holds for the entire -current code block. It means that the listed identifiers are to be interpreted -as globals. It would be impossible to assign to a global variable without +The :keyword:`global` statement causes the listed identifiers to be interpreted +as globals. It would be impossible to assign to a global variable without :keyword:`!global`, although free variables may refer to globals without being declared global. -Names listed in a :keyword:`global` statement must not be used in the same code -block textually preceding that :keyword:`!global` statement. - -Names listed in a :keyword:`global` statement must not be defined as formal -parameters, or as targets in :keyword:`with` statements or :keyword:`except` clauses, or in a :keyword:`for` target list, :keyword:`class` -definition, function definition, :keyword:`import` statement, or variable -annotation. - -.. impl-detail:: - - The current implementation does not enforce some of these restrictions, but - programs should not abuse this freedom, as future implementations may enforce - them or silently change the meaning of the program. +The :keyword:`global` statement applies to the entire scope of a function or +class body. A :exc:`SyntaxError` is raised if a variable is used or +assigned to prior to its global declaration in the scope. .. index:: pair: built-in function; exec @@ -1018,7 +1007,7 @@ identifiers. If a name is bound in more than one nonlocal scope, the nearest binding is used. If a name is not bound in any nonlocal scope, or if there is no nonlocal scope, a :exc:`SyntaxError` is raised. -The nonlocal statement applies to the entire scope of a function or +The :keyword:`nonlocal` statement applies to the entire scope of a function or class body. A :exc:`SyntaxError` is raised if a variable is used or assigned to prior to its nonlocal declaration in the scope.