From d5b0e98e955e18394f74fc644ccc8bc136516cec Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 28 Feb 2024 15:54:22 +0000 Subject: [PATCH 01/16] gh-111140: Fix edge case in PyLong_AsNativeBytes where large negative longs may require an extra byte --- Lib/test/test_capi/test_long.py | 22 ++++++++++++------- ...-02-28-15-50-01.gh-issue-111140.mpwcUg.rst | 3 +++ Objects/longobject.c | 15 +++++++------ 3 files changed, 25 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 9f5ee507a8eb85..0eeea1030c1f0a 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -451,11 +451,15 @@ def test_long_asnativebytes(self): (MAX_SSIZE, SZ), (MAX_USIZE, SZ + 1), (-MAX_SSIZE, SZ), - (-MAX_USIZE, SZ + 1), + (-MAX_USIZE, SZ), (2**255-1, 32), (-(2**255-1), 32), + (2**255, 33), + (-(2**255), 32), # edge case (2**256-1, 33), - (-(2**256-1), 33), + (-(2**256-1), 32), # edge case + (2**256, 33), + (-(2**256), 33), ]: with self.subTest(f"sizeof-{v:X}"): buffer = bytearray(b"\x5a") @@ -500,10 +504,10 @@ def test_long_asnativebytes(self): (256, b'\x01\x00', 2), # Extracts successfully (unsigned), but requests 9 bytes (2**63, b'\x80' + b'\x00' * 7, 9), - # "Extracts", but requests 9 bytes - (-2**63, b'\x80' + b'\x00' * 7, 9), (2**63, b'\x00\x80' + b'\x00' * 7, 9), - (-2**63, b'\xff\x80' + b'\x00' * 7, 9), + # Extracts successfully and only requests 8 bytes + (-2**63, b'\x80' + b'\x00' * 7, 8), + (-2**63, b'\xff\x80' + b'\x00' * 7, 8), (2**255-1, b'\x7f' + b'\xff' * 31, 32), (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', 32), @@ -516,9 +520,11 @@ def test_long_asnativebytes(self): # into a 32-byte buffer, though negative number may be unrecoverable (2**256-1, b'\xff' * 32, 33), (2**256-1, b'\x00' + b'\xff' * 32, 33), - (-(2**256-1), b'\x00' * 31 + b'\x01', 33), - (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), - (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), + # Negative 256 bits of integer will only request 32 bytes, since the + # top-most bit is the sign bit as well as the magnitude. + (-(2**256-1), b'\x00' * 31 + b'\x01', 32), + (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 32), + (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 32), # The classic "Windows HRESULT as negative number" case # HRESULT hr; diff --git a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst new file mode 100644 index 00000000000000..74c814ee409f6e --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst @@ -0,0 +1,3 @@ +Allow :c:func:`PyLong_AsNativeBytes` to extract negative numbers requiring +every single bit of the target buffer into the buffer without requesting a +larger one. diff --git a/Objects/longobject.c b/Objects/longobject.c index 2d1c6ad788e281..cc7dc58b68ede1 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1199,17 +1199,18 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0); } - // More efficient calculation for number of bytes required? + /* Calculates the number of bits required for the *absolute* value + * of v. This does not take sign into account, only magnitude. */ size_t nb = _PyLong_NumBits((PyObject *)v); /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up * multiples of 8 to the next byte, but we add an implied bit for * the sign and it cancels out. */ - size_t n_needed = (nb / 8) + 1; - res = (Py_ssize_t)n_needed; - if ((size_t)res != n_needed) { - PyErr_SetString(PyExc_OverflowError, - "value too large to convert"); - res = -1; + res = (Py_ssize_t)(nb / 8) + 1; + /* The edge case of a negative value where the sign bit is at the + * MSB of one byte needs special handling to avoid requesting an + * extra byte, even though it could be properly represented. */ + if (_PyLong_IsNegative(v) && !(nb % 8)) { + res -= 1; } } From f16234802837176c5de6535ec8deb73344dc02fd Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 1 Mar 2024 00:45:26 +0000 Subject: [PATCH 02/16] Improved edge case testing and fuzz tests --- Lib/test/test_capi/test_long.py | 87 ++++++++++++++++++++++++++++----- Objects/longobject.c | 58 ++++++++++++++++++++-- 2 files changed, 127 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 0eeea1030c1f0a..74869b7905f473 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -451,13 +451,13 @@ def test_long_asnativebytes(self): (MAX_SSIZE, SZ), (MAX_USIZE, SZ + 1), (-MAX_SSIZE, SZ), - (-MAX_USIZE, SZ), + (-MAX_USIZE, SZ + 1), (2**255-1, 32), (-(2**255-1), 32), (2**255, 33), - (-(2**255), 32), # edge case + (-(2**255), 33), # if you ask, we'll say 33, but 32 would do (2**256-1, 33), - (-(2**256-1), 32), # edge case + (-(2**256-1), 33), (2**256, 33), (-(2**256), 33), ]: @@ -498,16 +498,18 @@ def test_long_asnativebytes(self): (-1, b'\xff' * 10, min(11, SZ)), (-42, b'\xd6', 1), (-42, b'\xff' * 10 + b'\xd6', min(11, SZ)), - # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t) - (255, b'\xff', SZ), + # Extracts 255 into a single byte, but requests 2 + # (this is currently a special case, and "should" request SZ) + (255, b'\xff', 2), (255, b'\x00\xff', 2), (256, b'\x01\x00', 2), + (0x80, b'\x00' * 7 + b'\x80', 8), # Extracts successfully (unsigned), but requests 9 bytes (2**63, b'\x80' + b'\x00' * 7, 9), (2**63, b'\x00\x80' + b'\x00' * 7, 9), - # Extracts successfully and only requests 8 bytes + # Extracts into 8 bytes, but if you provide 9 we'll say 9 (-2**63, b'\x80' + b'\x00' * 7, 8), - (-2**63, b'\xff\x80' + b'\x00' * 7, 8), + (-2**63, b'\xff\x80' + b'\x00' * 7, 9), (2**255-1, b'\x7f' + b'\xff' * 31, 32), (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', 32), @@ -520,15 +522,18 @@ def test_long_asnativebytes(self): # into a 32-byte buffer, though negative number may be unrecoverable (2**256-1, b'\xff' * 32, 33), (2**256-1, b'\x00' + b'\xff' * 32, 33), - # Negative 256 bits of integer will only request 32 bytes, since the - # top-most bit is the sign bit as well as the magnitude. - (-(2**256-1), b'\x00' * 31 + b'\x01', 32), - (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 32), - (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 32), + (-(2**256-1), b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), + # However, -2**255 precisely will extract into 32 bytes and return + # success. For bigger buffers, it will still succeed, but will + # return 33 + (-(2**255), b'\x80' + b'\x00' * 31, 32), + (-(2**255), b'\xff\x80' + b'\x00' * 31, 33), # The classic "Windows HRESULT as negative number" case # HRESULT hr; - # PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT)) + # PyLong_AsNativeBytes(<-2147467259>, &hr, sizeof(HRESULT), -1) # assert(hr == E_FAIL) (-2147467259, b'\x80\x00\x40\x05', 4), ]: @@ -554,6 +559,62 @@ def test_long_asnativebytes(self): with self.assertRaises(TypeError): asnativebytes('not a number', buffer, 0, -1) + def test_long_asnativebytes_fuzz(self): + import math + from random import Random + from _testcapi import ( + pylong_asnativebytes as asnativebytes, + SIZE_MAX, + ) + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + + rng = Random() + buffer = bytearray(260) + + for _ in range(1000): + n = rng.randrange(1, 256) + bytes_be = bytes([rng.randrange(1, 256)] + [rng.randrange(256) for _ in range(n - 1)]) + bytes_le = bytes_be[::-1] + v = int.from_bytes(bytes_le, 'little') + # Allocate bigger buffer than actual value + + expect_1 = expect_2 = (SZ, n) + if bytes_be[0] & 0x80: + # All values are positive, so if MSB is set, expect extra bit + # when we request the size or have a large enough buffer + expect_1 = (SZ, n + 1) + # When requesting exactly the right size, we expect the return + # to be exactly the right size. + #expect_2 = (n,) + # However, right now, the extra bit is still requested. + expect_2 = (n + 1,) + + try: + actual = asnativebytes(v, buffer, 0, -1) + self.assertIn(actual, expect_1) + + actual = asnativebytes(v, buffer, len(buffer), 0) + self.assertIn(actual, expect_1) + self.assertEqual(bytes_be, buffer[-n:]) + + actual = asnativebytes(v, buffer, len(buffer), 1) + self.assertIn(actual, expect_1) + self.assertEqual(bytes_le, buffer[:n]) + + actual = asnativebytes(v, buffer, n, 0) + self.assertIn(actual, expect_2) + actual = asnativebytes(v, buffer, n, 1) + self.assertIn(actual, expect_2) + except AssertionError: + if support.verbose: + print() + print(''.join(f'{b:02X}' for b in bytes_be)) + print(n, 'bytes') + print('int =', v) + raise + def test_long_fromnativebytes(self): import math from _testcapi import ( diff --git a/Objects/longobject.c b/Objects/longobject.c index cc7dc58b68ede1..dbdeffb6d314b7 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1159,6 +1159,18 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) /* If we fit, return the requested number of bytes */ if (_fits_in_n_bits(cv.v, n * 8)) { res = n; + } else if (cv.v > 0 && _fits_in_n_bits(cv.v, n * 8 + 1)) { + /* Positive values with the MSB set do not require an + * additional bit when the caller's intent is to treat them + * as unsigned. */ + /* TODO: Disabled because we don't know the caller's intent + res = n; + * Instead, we'll return n+1, which is more accurate than + * res at this stage (which is just sizeof(size_t)), + * and it's something we can test for to ensure this case + * is being detected correctly. + */ + res = n + 1; } } else { @@ -1206,11 +1218,47 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) * multiples of 8 to the next byte, but we add an implied bit for * the sign and it cancels out. */ res = (Py_ssize_t)(nb / 8) + 1; - /* The edge case of a negative value where the sign bit is at the - * MSB of one byte needs special handling to avoid requesting an - * extra byte, even though it could be properly represented. */ - if (_PyLong_IsNegative(v) && !(nb % 8)) { - res -= 1; + + /* Two edge cases exist that are best handled after extracting the + * bits. These may result in us reporting overflow when the value + * actually fits. + */ + if (n > 0 && res > n && nb == n * 8) { + if (_PyLong_IsNegative(v)) { + /* Values of 0x80...00 from negative values that use every + * available bit in the buffer do not require an additional + * bit to store the sign. */ + int is_edge_case = 1; + unsigned char *b = (unsigned char *)buffer; + for (Py_ssize_t i = 0; i < n && is_edge_case; ++i, ++b) { + if (i == 0) { + is_edge_case = (*b == (little_endian ? 0 : 0x80)); + } else if (i < n - 1) { + is_edge_case = (*b == 0); + } else { + is_edge_case = (*b == (little_endian ? 0x80 : 0)); + } + } + if (is_edge_case) { + res = n; + } + } + else { + /* Positive values with the MSB set do not require an + * additional bit when the caller's intent is to treat them + * as unsigned. */ + unsigned char *b = (unsigned char *)buffer; + if (b[little_endian ? n - 1 : 0] & 0x80) { + /* TODO: Disabled because we don't know the caller's intent + res = n; + * Instead, we'll return n+1, which is more accurate than + * res at this stage (which might just be sizeof(size_t)), + * and it's something we can test for to ensure this case + * is being detected correctly. + */ + res = n + 1; + } + } } } From 0efa1432f6e0063e08276a3c0569d9736800c37f Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 1 Mar 2024 00:50:31 +0000 Subject: [PATCH 03/16] Test comments --- Lib/test/test_capi/test_long.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 74869b7905f473..7c36bd3d83379e 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -571,14 +571,18 @@ def test_long_asnativebytes_fuzz(self): SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) rng = Random() + # Allocate bigger buffer than actual values are going to be buffer = bytearray(260) for _ in range(1000): n = rng.randrange(1, 256) - bytes_be = bytes([rng.randrange(1, 256)] + [rng.randrange(256) for _ in range(n - 1)]) + bytes_be = bytes([ + # Ensure the most significant byte is nonzero + rng.randrange(1, 256), + *[rng.randrange(256) for _ in range(n - 1)] + ]) bytes_le = bytes_be[::-1] v = int.from_bytes(bytes_le, 'little') - # Allocate bigger buffer than actual value expect_1 = expect_2 = (SZ, n) if bytes_be[0] & 0x80: @@ -588,7 +592,8 @@ def test_long_asnativebytes_fuzz(self): # When requesting exactly the right size, we expect the return # to be exactly the right size. #expect_2 = (n,) - # However, right now, the extra bit is still requested. + # However, right now, the extra bit is still requested. These + # are the TODO comments in PyLong_AsNativeBytes expect_2 = (n + 1,) try: From 15b3752c8d5cf49fc103260e877cc8afbffb3c28 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 4 Mar 2024 19:00:12 +0000 Subject: [PATCH 04/16] Fix for x86 --- Lib/test/test_capi/test_long.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 7c36bd3d83379e..265e951839195b 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -503,7 +503,7 @@ def test_long_asnativebytes(self): (255, b'\xff', 2), (255, b'\x00\xff', 2), (256, b'\x01\x00', 2), - (0x80, b'\x00' * 7 + b'\x80', 8), + (0x80, b'\x00' * 7 + b'\x80', min(8, SZ)), # Extracts successfully (unsigned), but requests 9 bytes (2**63, b'\x80' + b'\x00' * 7, 9), (2**63, b'\x00\x80' + b'\x00' * 7, 9), From 8dedad7ad89bafe3982e8af35d840ca5673cebfa Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 19 Mar 2024 22:38:28 +0000 Subject: [PATCH 05/16] More precise check for unsigned non-overflow --- Objects/longobject.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 0415e3aac5ad2e..57696690552566 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1223,7 +1223,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) * bits. These may result in us reporting overflow when the value * actually fits. */ - if (n > 0 && res > n && nb == n * 8) { + if (n > 0 && res == n + 1 && nb == n * 8) { if (_PyLong_IsNegative(v)) { /* Values of 0x80...00 from negative values that use every * available bit in the buffer do not require an additional @@ -1251,10 +1251,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) if (b[little_endian ? n - 1 : 0] & 0x80) { /* TODO: Disabled because we don't know the caller's intent res = n; - * Instead, we'll return n+1, which is more accurate than - * res at this stage (which might just be sizeof(size_t)), - * and it's something we can test for to ensure this case - * is being detected correctly. + * Instead, we'll return res == n+1. */ res = n + 1; } From 621708c97d2edcce047fe62ba1a26a5136ae6361 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 28 Mar 2024 18:08:39 +0000 Subject: [PATCH 06/16] Change endianness arguments to flags to allow additional arguments --- Doc/c-api/long.rst | 52 +++++++++++++++++++++++++------ Include/cpython/longobject.h | 24 ++++++++++++--- Lib/test/test_capi/test_long.py | 2 -- Modules/_testcapi/long.c | 14 ++++----- Objects/longobject.c | 54 +++++++++++++++++++++------------ 5 files changed, 103 insertions(+), 43 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 6a7eba7761de1a..d2561f4296fe2a 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -118,8 +118,10 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as a two's-complement signed number. - *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or else ``0`` for big endian and ``1`` for little. + *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select + the native endian that CPython was compiled with and assume that the + most-significant bit is a sign bit. ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` has + no effect. .. versionadded:: 3.13 @@ -129,8 +131,10 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as an unsigned number. - *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or else ``0`` for big endian and ``1`` for little. + *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select + the native endian that CPython was compiled with and assume that the + most-significant bit is not a sign bit. ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` + has no effect. .. versionadded:: 3.13 @@ -354,7 +358,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. -.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness) +.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int flags) Copy the Python integer value to a native *buffer* of size *n_bytes*:: @@ -409,13 +413,41 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. // ... use bignum ... free(bignum); - *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or ``0`` for big endian and ``1`` for little. + *flags* is a combination of the flags shown in the table below, or ``-1`` to + select defaults that behave most like a C cast (currently, + ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``). + + ============================================= ====== + Flag Value + ============================================= ====== + .. c:macro:: Py_ASNATIVEBYTES_DEFAULTS ``-1`` + .. c:macro:: Py_ASNATIVEBYTES_BIG_ENDIAN ``0`` + .. c:macro:: Py_ASNATIVEBYTES_LITTLE_ENDIAN ``1`` + .. c:macro:: Py_ASNATIVEBYTES_NATIVE_ENDIAN ``3`` + .. c:macro:: Py_ASNATIVEBYTES_UNSIGNED_BUFFER ``4`` + .. c:macro:: Py_ASNATIVEBYTES_REJECT_NEGATIVE ``8`` + ============================================= ====== + + Specifying ``Py_ASNATIVEBYTES_NATIVE_ENDIAN`` will override any other endian + flags. + + Specifying ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` allows positive input values + that would set the most-significant bit to be converted. For example, + converting 128 into a single byte. Without this flag, these values request a + larger buffer in order to ensure a zero sign bit is included. If the + destination buffer is later treated as signed, a positive input value may + become negative. + + Specifying ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` causes an exception to be set + if *pylong* is negative. Without this flag, negative values will be copied + provided there is enough space for at least one sign bit, regardless of + whether ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` was specified. Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as - an integer. Otherwise, return the size of the buffer required to store the - value. If this is equal to or less than *n_bytes*, the entire value was - copied. ``0`` will never be returned. + an integer, or if *pylong* was negative and + ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` was set. Otherwise, return the size of + the buffer required to store the value. If this is equal to or less than + *n_bytes*, the entire value was copied. ``0`` will never be returned. Unless an exception is raised, all *n_bytes* of the buffer will always be written. In the case of truncation, as many of the lowest bits of the value diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 07251db6bcc203..f54761fab8ef32 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,11 +4,24 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); +#define Py_ASNATIVEBYTES_DEFAULTS -1 +#define Py_ASNATIVEBYTES_BIG_ENDIAN 0 +#define Py_ASNATIVEBYTES_LITTLE_ENDIAN 1 +#define Py_ASNATIVEBYTES_NATIVE_ENDIAN 3 +#define Py_ASNATIVEBYTES_UNSIGNED_BUFFER 4 +#define Py_ASNATIVEBYTES_REJECT_NEGATIVE 8 + /* PyLong_AsNativeBytes: Copy the integer value to a native variable. buffer points to the first byte of the variable. n_bytes is the number of bytes available in the buffer. Pass 0 to request the required size for the value. - endianness is -1 for native endian, 0 for big endian or 1 for little. + flags is a bitfield of the following flags: + * 1 - little endian + * 2 - native endian + * 4 - unsigned destination (e.g. don't reject copying 255 into one byte) + * 8 - raise an exception for negative inputs + If flags is -1 (all bits set), native endian is used and value truncation + behaves most like C (allows negative inputs and allow MSB set). Big endian mode will write the most significant byte into the address directly referenced by buffer; little endian will write the least significant byte into that address. @@ -24,19 +37,20 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); calculate the bit length of an integer object. */ PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, - Py_ssize_t n_bytes, int endianness); + Py_ssize_t n_bytes, int flags); /* PyLong_FromNativeBytes: Create an int value from a native integer n_bytes is the number of bytes to read from the buffer. Passing 0 will always produce the zero int. PyLong_FromUnsignedNativeBytes always produces a non-negative int. - endianness is -1 for native endian, 0 for big endian or 1 for little. + flags is the same as for PyLong_AsNativeBytes, but only supports selecting + the endianness. Returns the int object, or NULL with an exception set. */ PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, - int endianness); + int flags); PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, - size_t n_bytes, int endianness); + size_t n_bytes, int flags); PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index da0838e027febc..ebbbf70911f884 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -583,8 +583,6 @@ def test_long_asnativebytes(self): # Check a few error conditions. These are validated in code, but are # unspecified in docs, so if we make changes to the implementation, it's # fine to just update these tests rather than preserve the behaviour. - with self.assertRaises(SystemError): - asnativebytes(1, buffer, 0, 2) with self.assertRaises(TypeError): asnativebytes('not a number', buffer, 0, -1) diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 28dca01bee09a0..769c3909ea3fb1 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -52,8 +52,8 @@ pylong_asnativebytes(PyObject *module, PyObject *args) { PyObject *v; Py_buffer buffer; - Py_ssize_t n, endianness; - if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) { + Py_ssize_t n, flags; + if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &flags)) { return NULL; } if (buffer.readonly) { @@ -66,7 +66,7 @@ pylong_asnativebytes(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); + Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)flags); PyBuffer_Release(&buffer); return res >= 0 ? PyLong_FromSsize_t(res) : NULL; } @@ -76,8 +76,8 @@ static PyObject * pylong_fromnativebytes(PyObject *module, PyObject *args) { Py_buffer buffer; - Py_ssize_t n, endianness, signed_; - if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) { + Py_ssize_t n, flags, signed_; + if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &flags, &signed_)) { return NULL; } if (buffer.len < n) { @@ -86,8 +86,8 @@ pylong_fromnativebytes(PyObject *module, PyObject *args) return NULL; } PyObject *res = signed_ - ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness) - : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness); + ? PyLong_FromNativeBytes(buffer.buf, n, (int)flags) + : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)flags); PyBuffer_Release(&buffer); return res; } diff --git a/Objects/longobject.c b/Objects/longobject.c index 57696690552566..d3bf3a7b50243f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1083,8 +1083,10 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) static inline int _resolve_endianness(int *endianness) { - if (*endianness < 0) { + if (*endianness == -1 || (*endianness & 2)) { *endianness = PY_LITTLE_ENDIAN; + } else { + *endianness &= 1; } if (*endianness != 0 && *endianness != 1) { PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); @@ -1094,7 +1096,7 @@ _resolve_endianness(int *endianness) } Py_ssize_t -PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) +PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) { PyLongObject *v; union { @@ -1109,7 +1111,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) return -1; } - int little_endian = endianness; + int little_endian = flags; if (_resolve_endianness(&little_endian) < 0) { return -1; } @@ -1125,6 +1127,15 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) do_decref = 1; } + if ((flags != -1 && (flags & Py_ASNATIVEBYTES_REJECT_NEGATIVE)) + && _PyLong_IsNegative(v)) { + PyErr_SetString(PyExc_OverflowError, "Cannot convert negative int"); + if (do_decref) { + Py_DECREF(v); + } + return -1; + } + if (_PyLong_IsCompact(v)) { res = 0; cv.v = _PyLong_CompactValue(v); @@ -1163,14 +1174,11 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) /* Positive values with the MSB set do not require an * additional bit when the caller's intent is to treat them * as unsigned. */ - /* TODO: Disabled because we don't know the caller's intent - res = n; - * Instead, we'll return n+1, which is more accurate than - * res at this stage (which is just sizeof(size_t)), - * and it's something we can test for to ensure this case - * is being detected correctly. - */ - res = n + 1; + if (flags == -1 || flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER) { + res = n; + } else { + res = n + 1; + } } } else { @@ -1268,38 +1276,46 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) PyObject * -PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) +PyLong_FromNativeBytes(const void* buffer, size_t n, int flags) { if (!buffer) { PyErr_BadInternalCall(); return NULL; } - int little_endian = endianness; + int little_endian = flags; if (_resolve_endianness(&little_endian) < 0) { return NULL; } - return _PyLong_FromByteArray((const unsigned char *)buffer, n, - little_endian, 1); + return _PyLong_FromByteArray( + (const unsigned char *)buffer, + n, + little_endian, + (flags == -1 || !(flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) ? 1 : 0 + ); } PyObject * -PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) +PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int flags) { if (!buffer) { PyErr_BadInternalCall(); return NULL; } - int little_endian = endianness; + int little_endian = flags; if (_resolve_endianness(&little_endian) < 0) { return NULL; } - return _PyLong_FromByteArray((const unsigned char *)buffer, n, - little_endian, 0); + return _PyLong_FromByteArray( + (const unsigned char *)buffer, + n, + little_endian, + (flags != -1 && (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) ? 1 : 0 + ); } From c607f04b23893913d1da044820e3ac6026d8d19d Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 28 Mar 2024 18:14:57 +0000 Subject: [PATCH 07/16] Space --- Doc/c-api/long.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index d2561f4296fe2a..07aabd7e00d74a 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -418,7 +418,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``). ============================================= ====== - Flag Value + Flag Value ============================================= ====== .. c:macro:: Py_ASNATIVEBYTES_DEFAULTS ``-1`` .. c:macro:: Py_ASNATIVEBYTES_BIG_ENDIAN ``0`` From 140c0112ef9cf5172f80288a2e53d92e9748b7ad Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 28 Mar 2024 20:55:44 +0000 Subject: [PATCH 08/16] Fix tests, one result, and docs --- Doc/c-api/long.rst | 6 ++-- Include/cpython/longobject.h | 2 +- Lib/test/test_capi/test_long.py | 51 ++++++++++++++++++++++++++------- Objects/longobject.c | 19 +++++------- 4 files changed, 52 insertions(+), 26 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 07aabd7e00d74a..30cf56571e7579 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -121,7 +121,8 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select the native endian that CPython was compiled with and assume that the most-significant bit is a sign bit. ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` has - no effect. + no effect. Passing ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` will produce the same + result as calling :c:func:`PyLong_FromUnsignedNativeBytes`. .. versionadded:: 3.13 @@ -133,8 +134,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select the native endian that CPython was compiled with and assume that the - most-significant bit is not a sign bit. ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` - has no effect. + most-significant bit is not a sign bit. Flags other than endian are ignored. .. versionadded:: 3.13 diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index f54761fab8ef32..189229ee1035d8 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -44,7 +44,7 @@ PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, always produce the zero int. PyLong_FromUnsignedNativeBytes always produces a non-negative int. flags is the same as for PyLong_AsNativeBytes, but only supports selecting - the endianness. + the endianness or forcing an unsigned buffer. Returns the int object, or NULL with an exception set. */ PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index ebbbf70911f884..24a604b194b01e 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -580,6 +580,33 @@ def test_long_asnativebytes(self): f"PyLong_AsNativeBytes(v, buffer, {n}, )") self.assertEqual(expect_le, buffer[:n], "") + # Test cases that do not request size for a sign bit when we pass the + # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag + for v, expect_be, expect_n in [ + (255, b'\xff', 1), + # We pass a 2 byte buffer so it just uses the whole thing + (255, b'\x00\xff', 2), + + (2**63, b'\x80' + b'\x00' * 7, 8), + # We pass a 9 byte buffer so it uses the whole thing + (2**63, b'\x00\x80' + b'\x00' * 7, 9), + + (2**256-1, b'\xff' * 32, 32), + # We pass a 33 byte buffer so it uses the whole thing + (2**256-1, b'\x00' + b'\xff' * 32, 33), + ]: + with self.subTest(f"{v:X}-{len(expect_be)}bytes-unsigned"): + n = len(expect_be) + buffer = bytearray(b"\xa5"*n) + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 4), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 5), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") + + # Ensure Py_ASNATIVEBYTES_REJECT_NEGATIVE raises on negative value + with self.assertRaises(OverflowError): + asnativebytes(-1, buffer, 0, 8) + # Check a few error conditions. These are validated in code, but are # unspecified in docs, so if we make changes to the implementation, it's # fine to just update these tests rather than preserve the behaviour. @@ -616,12 +643,9 @@ def test_long_asnativebytes_fuzz(self): # All values are positive, so if MSB is set, expect extra bit # when we request the size or have a large enough buffer expect_1 = (SZ, n + 1) - # When requesting exactly the right size, we expect the return - # to be exactly the right size. - #expect_2 = (n,) - # However, right now, the extra bit is still requested. These - # are the TODO comments in PyLong_AsNativeBytes - expect_2 = (n + 1,) + # When passing Py_ASNATIVEBYTES_UNSIGNED_BUFFER, we expect the + # return to be exactly the right size. + expect_2 = (n,) try: actual = asnativebytes(v, buffer, 0, -1) @@ -635,10 +659,10 @@ def test_long_asnativebytes_fuzz(self): self.assertIn(actual, expect_1) self.assertEqual(bytes_le, buffer[:n]) - actual = asnativebytes(v, buffer, n, 0) - self.assertIn(actual, expect_2) - actual = asnativebytes(v, buffer, n, 1) - self.assertIn(actual, expect_2) + actual = asnativebytes(v, buffer, n, 4) + self.assertIn(actual, expect_2, bytes_be.hex()) + actual = asnativebytes(v, buffer, n, 5) + self.assertIn(actual, expect_2, bytes_be.hex()) except AssertionError: if support.verbose: print() @@ -687,6 +711,13 @@ def test_long_fromnativebytes(self): self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0), f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + # Swap the signed/unsigned request for tests and use the + # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag to select the opposite + self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, 4, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + if __name__ == "__main__": unittest.main() diff --git a/Objects/longobject.c b/Objects/longobject.c index d3bf3a7b50243f..fab579f6b39daa 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1174,7 +1174,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) /* Positive values with the MSB set do not require an * additional bit when the caller's intent is to treat them * as unsigned. */ - if (flags == -1 || flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER) { + if (flags == -1 || (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) { res = n; } else { res = n + 1; @@ -1257,11 +1257,11 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) * as unsigned. */ unsigned char *b = (unsigned char *)buffer; if (b[little_endian ? n - 1 : 0] & 0x80) { - /* TODO: Disabled because we don't know the caller's intent - res = n; - * Instead, we'll return res == n+1. - */ - res = n + 1; + if (flags == -1 || (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) { + res = n; + } else { + res = n + 1; + } } } } @@ -1310,12 +1310,7 @@ PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int flags) return NULL; } - return _PyLong_FromByteArray( - (const unsigned char *)buffer, - n, - little_endian, - (flags != -1 && (flags & Py_ASNATIVEBYTES_UNSIGNED_BUFFER)) ? 1 : 0 - ); + return _PyLong_FromByteArray((const unsigned char *)buffer, n, little_endian, 0); } From 77097bf4cb3966ccbfa1b80a2a2abd236184c4c1 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 28 Mar 2024 20:58:24 +0000 Subject: [PATCH 09/16] Fix test --- Lib/test/test_capi/test_long.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 24a604b194b01e..dc7ca6b6e011c4 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -711,10 +711,8 @@ def test_long_fromnativebytes(self): self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0), f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") - # Swap the signed/unsigned request for tests and use the - # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag to select the opposite - self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 0), - f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + # Swap the unsigned request for tests and use the + # Py_ASNATIVEBYTES_UNSIGNED_BUFFER flag instead self.assertEqual(expect_u, fromnativebytes(v_be, n, 4, 1), f"PyLong_FromNativeBytes(buffer, {n}, )") From a8e25404a612715aca0d0e1d3f3b06bbe7977e31 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 28 Mar 2024 21:49:56 +0000 Subject: [PATCH 10/16] Update NEWS --- .../C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst index 74c814ee409f6e..dcf93f29b1d02b 100644 --- a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst +++ b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst @@ -1,3 +1,3 @@ -Allow :c:func:`PyLong_AsNativeBytes` to extract negative numbers requiring -every single bit of the target buffer into the buffer without requesting a -larger one. +Add additional flags to :c:func:`PyLong_AsNativeBytes` and +:c:func`PyLong_FromNativeBytes` to allow the caller to determine how to handle +edge cases around values that fill the entire buffer. From 5ba6451179553e887e24b270f04c2f91e801858d Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 28 Mar 2024 21:52:00 +0000 Subject: [PATCH 11/16] Missed colon --- .../next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst index dcf93f29b1d02b..113db93d186009 100644 --- a/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst +++ b/Misc/NEWS.d/next/C API/2024-02-28-15-50-01.gh-issue-111140.mpwcUg.rst @@ -1,3 +1,3 @@ Add additional flags to :c:func:`PyLong_AsNativeBytes` and -:c:func`PyLong_FromNativeBytes` to allow the caller to determine how to handle +:c:func:`PyLong_FromNativeBytes` to allow the caller to determine how to handle edge cases around values that fill the entire buffer. From 68b9d8ed9ae41bef09c2b11694b65a088c150d65 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 2 Apr 2024 13:50:50 +0100 Subject: [PATCH 12/16] Doc fixes and improved test failure output --- Doc/c-api/long.rst | 13 +++++++------ Lib/test/test_capi/test_long.py | 12 +++++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 30cf56571e7579..07a3989507df39 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -113,21 +113,21 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`. -.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness) +.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int flags) Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as a two's-complement signed number. *flags* are as for :c:func:`PyLong_AsNativeBytes`. Passing ``-1`` will select the native endian that CPython was compiled with and assume that the - most-significant bit is a sign bit. ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` has - no effect. Passing ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` will produce the same - result as calling :c:func:`PyLong_FromUnsignedNativeBytes`. + most-significant bit is a sign bit. Passing + ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` will produce the same result as calling + :c:func:`PyLong_FromUnsignedNativeBytes`. Other flags are ignored. .. versionadded:: 3.13 -.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness) +.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int flags) Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as an unsigned number. @@ -416,6 +416,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. *flags* is a combination of the flags shown in the table below, or ``-1`` to select defaults that behave most like a C cast (currently, ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``). + Note that ``Py_ASNATIVEBYTES_DEFAULTS`` cannot be combined with other flags. ============================================= ====== Flag Value @@ -429,7 +430,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. ============================================= ====== Specifying ``Py_ASNATIVEBYTES_NATIVE_ENDIAN`` will override any other endian - flags. + flags. Passing ``2`` is reserved. Specifying ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` allows positive input values that would set the most-significant bit to be converted. For example, diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index dc7ca6b6e011c4..5f6701c476e2ac 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -660,16 +660,22 @@ def test_long_asnativebytes_fuzz(self): self.assertEqual(bytes_le, buffer[:n]) actual = asnativebytes(v, buffer, n, 4) + self.assertEqual(1, v) self.assertIn(actual, expect_2, bytes_be.hex()) actual = asnativebytes(v, buffer, n, 5) self.assertIn(actual, expect_2, bytes_be.hex()) - except AssertionError: + except AssertionError as ex: + value_hex = ''.join(reversed([ + f'{b:02X}{"" if i % 8 else "_"}' + for i, b in enumerate(bytes_le, start=1) + ])).strip('_') if support.verbose: print() - print(''.join(f'{b:02X}' for b in bytes_be)) print(n, 'bytes') + print('hex =', value_hex) print('int =', v) - raise + raise + raise AssertionError(f"Value: 0x{value_hex}") from ex def test_long_fromnativebytes(self): import math From 37ad671192ade0bf53c70465692dd34d294ede9b Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 2 Apr 2024 13:52:34 +0100 Subject: [PATCH 13/16] Remove my deliberate failure --- Lib/test/test_capi/test_long.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 5f6701c476e2ac..7146430f480032 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -660,7 +660,6 @@ def test_long_asnativebytes_fuzz(self): self.assertEqual(bytes_le, buffer[:n]) actual = asnativebytes(v, buffer, n, 4) - self.assertEqual(1, v) self.assertIn(actual, expect_2, bytes_be.hex()) actual = asnativebytes(v, buffer, n, 5) self.assertIn(actual, expect_2, bytes_be.hex()) From 1e615cf0f1a4c667f38ecf28e4e245aa663710e5 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 4 Apr 2024 16:35:32 +0200 Subject: [PATCH 14/16] Reword PyLong_AsNativeBytes docs (#33) --- Doc/c-api/long.rst | 113 ++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 07a3989507df39..1eb8f191c3ca32 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -360,12 +360,39 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int flags) - Copy the Python integer value to a native *buffer* of size *n_bytes*:: + Copy the Python integer value *pylong* to a native *buffer* of size + *n_bytes*. The *flags* can be set to ``-1`` to behave similarly to a C cast, + or to values documented below to control the behavior. + + Returns ``-1`` with an exception raised on error. This may happen if + *pylong* cannot be interpreted as an integer, or if *pylong* was negative + and the ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` flag was set. + + Otherwise, returns the number of bytes required to store the value. + If this is equal to or less than *n_bytes*, the entire value was copied. + All *n_bytes* of the buffer are written: large buffers are padded with + zeroes. + + If the returned value is greater than than *n_bytes*, the value was + truncated: as many of the lowest bits of the value as could fit are written, + and the higher bits are ignored. This matches the typical behavior + of a C-style downcast. + + .. note:: + + Overflow is not considered an error. If the returned value + is larger than *n_bytes*, most significant bits were discarded. + + ``0`` will never be returned. + + Values are always copied as two's-complement. + + Usage example:: int32_t value; Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1); if (bytes < 0) { - // A Python exception was set with the reason. + // Failed. A Python exception was set with the reason. return NULL; } else if (bytes <= (Py_ssize_t)sizeof(value)) { @@ -376,19 +403,24 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. // lowest bits of pylong. } - The above example may look *similar* to - :c:func:`PyLong_As* ` - but instead fills in a specific caller defined type and never raises an - error about of the :class:`int` *pylong*'s value regardless of *n_bytes* - or the returned byte count. + Passing zero to *n_bytes* will return the size of a buffer that would + be large enough to hold the value. This may be larger than technically + necessary, but not unreasonably so. - To get at the entire potentially big Python value, this can be used to - reserve enough space and copy it:: + .. note:: + + Passing *n_bytes=0* to this function is not an accurate way to determine + the bit length of a value. + + If *n_bytes=0*, *buffer* may be ``NULL``. + + To get at the entire Python value of an unknown size, the function can be + called twice: first to determine the buffer size, then to fill it:: // Ask how much space we need. Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1); if (expected < 0) { - // A Python exception was set with the reason. + // Failed. A Python exception was set with the reason. return NULL; } assert(expected != 0); // Impossible per the API definition. @@ -399,11 +431,11 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. } // Safely get the entire value. Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1); - if (bytes < 0) { // Exception set. + if (bytes < 0) { // Exception has been set. free(bignum); return NULL; } - else if (bytes > expected) { // Be safe, should not be possible. + else if (bytes > expected) { // This should not be possible. PyErr_SetString(PyExc_RuntimeError, "Unexpected bignum truncation after a size check."); free(bignum); @@ -413,10 +445,13 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. // ... use bignum ... free(bignum); - *flags* is a combination of the flags shown in the table below, or ``-1`` to - select defaults that behave most like a C cast (currently, - ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``). - Note that ``Py_ASNATIVEBYTES_DEFAULTS`` cannot be combined with other flags. + *flags* is either ``-1`` (``Py_ASNATIVEBYTES_DEFAULTS``) to select defaults + that behave most like a C cast, or a combintation of the other flags in + the table below. + Note that ``-1`` cannot be combined with other flags. + + Currently, ``-1`` corresponds to + ``Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_UNSIGNED_BUFFER``. ============================================= ====== Flag Value @@ -432,45 +467,29 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Specifying ``Py_ASNATIVEBYTES_NATIVE_ENDIAN`` will override any other endian flags. Passing ``2`` is reserved. - Specifying ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` allows positive input values - that would set the most-significant bit to be converted. For example, - converting 128 into a single byte. Without this flag, these values request a - larger buffer in order to ensure a zero sign bit is included. If the - destination buffer is later treated as signed, a positive input value may - become negative. + By default, sufficient buffer will be requested to include a sign bit. + For example, when converting 128 with *n_bytes=1*, the function will return + 2 (or more) in order to store a zero sign bit. + + If ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` is specified, a zero sign bit + will be omitted from size calculations. This allows, for example, 128 to fit + in a single-byte buffer. If the destination buffer is later treated as + signed, a positive input value may become negative. + Note that the flag does not affect handling of negative values: for those, + space for a sign bit is always requested. Specifying ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` causes an exception to be set if *pylong* is negative. Without this flag, negative values will be copied provided there is enough space for at least one sign bit, regardless of whether ``Py_ASNATIVEBYTES_UNSIGNED_BUFFER`` was specified. - Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as - an integer, or if *pylong* was negative and - ``Py_ASNATIVEBYTES_REJECT_NEGATIVE`` was set. Otherwise, return the size of - the buffer required to store the value. If this is equal to or less than - *n_bytes*, the entire value was copied. ``0`` will never be returned. - - Unless an exception is raised, all *n_bytes* of the buffer will always be - written. In the case of truncation, as many of the lowest bits of the value - as could fit are written. This allows the caller to ignore all non-negative - results if the intent is to match the typical behavior of a C-style - downcast. No exception is set on truncation. - - Values are always copied as two's-complement and sufficient buffer will be - requested to include a sign bit. For example, this may cause an value that - fits into 8 bytes when treated as unsigned to request 9 bytes, even though - all eight bytes were copied into the buffer. What has been omitted is the - zero sign bit -- redundant if the caller's intention is to treat the value - as unsigned. - - Passing zero to *n_bytes* will return the size of a buffer that would - be large enough to hold the value. This may be larger than technically - necessary, but not unreasonably so. - .. note:: - Passing *n_bytes=0* to this function is not an accurate way to determine - the bit length of a value. + With the default *flags* (``-1``, or *UNSIGNED_BUFFER* without + *REJECT_NEGATIVE*), multiple Python integers can map to a single value + without overflow. For example, both ``255`` and ``-1`` fit a single-byte + buffer and set all its bits. + This matches typical C cast behavior. .. versionadded:: 3.13 From d1b886c549ffb66b70a739e3c94338baba2a5e35 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 4 Apr 2024 15:36:45 +0100 Subject: [PATCH 15/16] Apply PR feedback --- Objects/longobject.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index fab579f6b39daa..c4ab064d688d67 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1088,10 +1088,7 @@ _resolve_endianness(int *endianness) } else { *endianness &= 1; } - if (*endianness != 0 && *endianness != 1) { - PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); - return -1; - } + assert(*endianness == 0 || *endianness == 1); return 0; } @@ -1129,7 +1126,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) if ((flags != -1 && (flags & Py_ASNATIVEBYTES_REJECT_NEGATIVE)) && _PyLong_IsNegative(v)) { - PyErr_SetString(PyExc_OverflowError, "Cannot convert negative int"); + PyErr_SetString(PyExc_ValueError, "Cannot convert negative int"); if (do_decref) { Py_DECREF(v); } @@ -1222,16 +1219,20 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) /* Calculates the number of bits required for the *absolute* value * of v. This does not take sign into account, only magnitude. */ size_t nb = _PyLong_NumBits((PyObject *)v); - /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up - * multiples of 8 to the next byte, but we add an implied bit for - * the sign and it cancels out. */ - res = (Py_ssize_t)(nb / 8) + 1; + if (nb == (size_t)-1) { + res = -1; + } else { + /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up + * multiples of 8 to the next byte, but we add an implied bit for + * the sign and it cancels out. */ + res = (Py_ssize_t)(nb / 8) + 1; + } /* Two edge cases exist that are best handled after extracting the * bits. These may result in us reporting overflow when the value * actually fits. */ - if (n > 0 && res == n + 1 && nb == n * 8) { + if (n > 0 && res == n + 1 && nb % 8 == 0) { if (_PyLong_IsNegative(v)) { /* Values of 0x80...00 from negative values that use every * available bit in the buffer do not require an additional From 614a6aed51d692905ceb498c2aa30656303df8fb Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 4 Apr 2024 15:39:31 +0100 Subject: [PATCH 16/16] Check for new error --- Lib/test/test_capi/test_long.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 7146430f480032..83f894e552f983 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -604,7 +604,7 @@ def test_long_asnativebytes(self): f"PyLong_AsNativeBytes(v, buffer, {n}, )") # Ensure Py_ASNATIVEBYTES_REJECT_NEGATIVE raises on negative value - with self.assertRaises(OverflowError): + with self.assertRaises(ValueError): asnativebytes(-1, buffer, 0, 8) # Check a few error conditions. These are validated in code, but are