From dc6f6163ad9754c9ad53e9e3f3613ca3891a77ba Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Sun, 18 Apr 2021 15:50:34 -0400 Subject: [PATCH] add blake3 to hashlib blake3_impl.c and blake3module.c are adapted from the existing BLAKE2 module. This involves a lot of copy-paste, and hopefully someone who knows this code better can help me clean them up. (In particular, BLAKE2 relies on clinic codegen to share code between BLAKE2b and BLAKE2s, but BLAKE3 has no need for that.) blake3_dispatch.c, which is vendored from upstream, includes runtime CPU feature detection to choose the appropriate SIMD instruction set for the current platform (x86 only). In this model, the build should include all instruction sets, and here I unconditionally include the Unix assembly files (*_unix.S) as `extra_objects` in setup.py. This "works on my box", but is currently incomplete in several ways: - It needs some Windows-specific build logic. There are two additional assembly flavors included for each instruction set, *_windows_gnu.S and *_windows_msvc.asm. I need to figure out how to include the right flavor based on the target OS/ABI. - I need to figure out how to omit these files on non-x86-64 platforms. x86-32 will require some explicit preprocessor definitions to restrict blake3_dispatch.c to portable code. (Unless we vendor intrinsics-based implementations for 32-bit support. More on this below.) - It's not going to work on compilers that are too old to recognize these instruction sets, particularly AVX-512. (Question: What's the oldest GCC version that CPython supports?) Maybe compiler feature detection could be added to ./configure and somehow plumbed through to setup.py. I'm hoping someone more experienced with the build system can help me narrow down the best solution for each of those. This also raises the higher level question of whether the CPython project feels comfortable about including assembly files in general. As a possible alternative, the upstream BLAKE3 project also provides intrinsics-based implementations of the same optimizations. The upsides of these are 1) that they don't require Unix/Windows platform detection, 2) that they support 32-bit x86 targets, and 3) that C is easier to audit than assembly. However, the downsides of these are 1) that they're ~10% slower than the hand-written assembly, 2) that their performance is less consistent and worse on older compilers, and 3) that they take noticeably longer to compile. We recommend the assembly implementations for these reasons, but intrinsics are a viable option if assembly violates CPython's requirements. --- Lib/hashlib.py | 10 +- Lib/test/test_hashlib.py | 80 +++++++++- Modules/_blake3/blake3_impl.c | 253 ++++++++++++++++++++++++++++++++ Modules/_blake3/blake3_impl.c.h | 158 ++++++++++++++++++++ Modules/_blake3/blake3module.c | 106 +++++++++++++ PC/config.c | 1 + configure | 6 +- setup.py | 25 +++- 8 files changed, 627 insertions(+), 12 deletions(-) create mode 100644 Modules/_blake3/blake3_impl.c create mode 100644 Modules/_blake3/blake3_impl.c.h create mode 100644 Modules/_blake3/blake3module.c diff --git a/Lib/hashlib.py b/Lib/hashlib.py index ffa3be049a4f35..69c4801448c9bf 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -12,7 +12,7 @@ than using new(name): md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(), -sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256. +blake3(), sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256. More algorithms may be available on your platform but the above are guaranteed to exist. See the algorithms_guaranteed and algorithms_available attributes @@ -56,7 +56,7 @@ # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', - 'blake2b', 'blake2s', + 'blake2b', 'blake2s', 'blake3', 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', 'shake_128', 'shake_256') @@ -75,8 +75,9 @@ # implementations neither support keyed blake2 (blake2 MAC) nor advanced # features like salt, personalization, or tree hashing. OpenSSL hash-only # variants are available as 'blake2b512' and 'blake2s256', though. +# OpenSSL 1.1.0 does not support blake3. __block_openssl_constructor = { - 'blake2b', 'blake2s', + 'blake2b', 'blake2s', 'blake3' } def __get_builtin_constructor(name): @@ -103,6 +104,9 @@ def __get_builtin_constructor(name): import _blake2 cache['blake2b'] = _blake2.blake2b cache['blake2s'] = _blake2.blake2s + elif name in {'blake3'}: + import _blake3 + cache['blake3'] = _blake3.blake3 elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}: import _sha3 cache['sha3_224'] = _sha3.sha3_224 diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index bf9f5594004602..d2cb8462d0d5e7 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -92,7 +92,7 @@ class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', 'sha224', 'SHA224', 'sha256', 'SHA256', 'sha384', 'SHA384', 'sha512', 'SHA512', - 'blake2b', 'blake2s', + 'blake2b', 'blake2s', 'blake3', 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', 'shake_128', 'shake_256') @@ -172,6 +172,9 @@ def add_builtin_constructor(name): if _blake2: add_builtin_constructor('blake2s') add_builtin_constructor('blake2b') + _blake3 = self._conditional_import_module('_blake3') + if _blake3: + add_builtin_constructor('blake3') _sha3 = self._conditional_import_module('_sha3') if _sha3: @@ -342,7 +345,7 @@ def test_large_update(self): self.assertEqual(m1.digest(*args), m4_copy.digest(*args)) self.assertEqual(m4.digest(*args), m4_digest) - def check(self, name, data, hexdigest, shake=False, **kwargs): + def check(self, name, data, hexdigest, set_length=False, **kwargs): length = len(hexdigest)//2 hexdigest = hexdigest.lower() constructors = self.constructors_to_test[name] @@ -350,17 +353,17 @@ def check(self, name, data, hexdigest, shake=False, **kwargs): self.assertGreaterEqual(len(constructors), 2) for hash_object_constructor in constructors: m = hash_object_constructor(data, **kwargs) - computed = m.hexdigest() if not shake else m.hexdigest(length) + computed = m.hexdigest() if not set_length else m.hexdigest(length) self.assertEqual( computed, hexdigest, "Hash algorithm %s constructed using %s returned hexdigest" " %r for %d byte input data that should have hashed to %r." % (name, hash_object_constructor, computed, len(data), hexdigest)) - computed = m.digest() if not shake else m.digest(length) + computed = m.digest() if not set_length else m.digest(length) digest = bytes.fromhex(hexdigest) self.assertEqual(computed, digest) - if not shake: + if not set_length: self.assertEqual(len(digest), m.digest_size) def check_no_unicode(self, algorithm_name): @@ -776,6 +779,73 @@ def test_blake2s_vectors(self): key = bytes.fromhex(key) self.check('blake2s', msg, md, key=key) + def test_case_blake3_0(self): + self.check('blake3', b"", + "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262") + + def test_case_blake3_1(self): + self.check('blake3', b"abc", + "6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85") + + def test_case_blake3_keyed(self): + self.check('blake3', b"abc", + "6da54495d8152f2bcba87bd7282df70901cdb66b4448ed5f4c7bd2852b8b5532", + key=bytes(range(32))) + + def test_case_blake3_derive_key(self): + self.check('blake3', b"super secret key material", + "dbf0a1433e0137fb11b71d3ae3c138bff46445936dd5d4f01f403c23abd5660a", + derive_key_context="hardcoded, globally unique, application-specific context string") + + def test_case_blake3_length(self): + self.check('blake3', b"", + "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" + + "e00f03e7b69af26b7faaf09fcd333050338ddfe085b8cc869ca98b206c08243a" + + "26f5487789e8f660afe6c99ef9e0c52b92e7393024a80459cf91f476f9ffdbda" + + "7001c22e159b402631f277ca96f2defdf1078282314e763699a31c5363165421" + + "cce14d", + # True here means that digest() and hexdigest() get a length + # argument. This shares the variable length test codepath with + # shake_128 and shake_256. + True) + + def test_case_blake3_seek(self): + # None of the other hashes support a seek parameter. Rather than + # hacking this into self.check(), just invoke blake3() explicitly. + output_hex = ( + "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" + + "e00f03e7b69af26b7faaf09fcd333050338ddfe085b8cc869ca98b206c08243a" + + "26f5487789e8f660afe6c99ef9e0c52b92e7393024a80459cf91f476f9ffdbda" + + "7001c22e159b402631f277ca96f2defdf1078282314e763699a31c5363165421" + + "cce14d") + output_bytes = unhexlify(output_hex) + # Test a few interesting seek points, including one with length=0. + for seek in [0, 1, len(output_bytes)//2, len(output_bytes)-1, len(output_bytes)]: + length = len(output_bytes) - seek + expected_bytes = output_bytes[seek:] + expected_hex = output_hex[2*seek:] + # positional + assert expected_bytes == hashlib.blake3().digest(length, seek) + assert expected_hex == hashlib.blake3().hexdigest(length, seek) + # keywords + assert expected_bytes == hashlib.blake3().digest(length=length, seek=seek) + assert expected_hex == hashlib.blake3().hexdigest(length=length, seek=seek) + + def test_case_blake3_key_must_be_32_bytes(self): + for length in [0, 1, 31, 33]: + try: + hashlib.blake3(key=b"\0"*31) + assert False, "the line above should raise an exception" + except ValueError as e: + assert str(e) == "key must be 32 bytes" + + def test_case_blake3_keyed_derive_key_exclusive(self): + try: + hashlib.blake3(key=b"\0"*32, derive_key_context="foo") + assert False, "the line above should raise an exception" + except ValueError as e: + assert str(e) == "key and derive_key_context can't be used together" + def test_case_sha3_224_0(self): self.check('sha3_224', b"", "6b4e03423667dbb73b6e15454f0eb1abd4597f9a1b078e3f5b5a6bc7") diff --git a/Modules/_blake3/blake3_impl.c b/Modules/_blake3/blake3_impl.c new file mode 100644 index 00000000000000..968bea6bc26516 --- /dev/null +++ b/Modules/_blake3/blake3_impl.c @@ -0,0 +1,253 @@ +/* + * Written in 2013 by Dmitry Chestnykh + * Modified for CPython by Christian Heimes + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +#include "Python.h" +#include "pystrhex.h" + +#include "../hashlib.h" +#include "impl/blake3.h" + +extern PyType_Spec blake3_type_spec; + +typedef struct { + PyObject_HEAD + PyThread_type_lock lock; + blake3_hasher state; +} BLAKE3Object; + +#include "blake3_impl.c.h" + +static BLAKE3Object * +new_BLAKE3Object(PyTypeObject *type) +{ + BLAKE3Object *self; + self = (BLAKE3Object *)type->tp_alloc(type, 0); + if (self != NULL) { + self->lock = NULL; + } + return self; +} + +static PyObject * +py_blake3_new_impl(PyTypeObject *type, PyObject *data, Py_buffer *key, + const char *derive_key_context_utf8, Py_ssize_t derive_key_context_len, + int usedforsecurity) { + BLAKE3Object *self = NULL; + Py_buffer buf; + + self = new_BLAKE3Object(type); + if (self == NULL) { + goto error; + } + + if (key != NULL && derive_key_context_utf8 != NULL) { + PyErr_Format(PyExc_ValueError, + "key and derive_key_context can't be used together"); + goto error; + } + + if (key != NULL) { + if (key->len != BLAKE3_KEY_LEN) { + PyErr_Format(PyExc_ValueError, + "key must be %d bytes", + BLAKE3_KEY_LEN); + goto error; + } + // The keyed mode. + blake3_hasher_init_keyed(&self->state, key->buf); + } else if (derive_key_context_utf8 != NULL) { + // The keyed mode. + blake3_hasher_init_derive_key_raw(&self->state, derive_key_context_utf8, + derive_key_context_len); + } else { + // The default hashing mode. + blake3_hasher_init(&self->state); + } + + /* Process initial data if any. */ + if (data != NULL) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + + if (buf.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + blake3_hasher_update(&self->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + blake3_hasher_update(&self->state, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; + + error: + if (self != NULL) { + Py_DECREF(self); + } + return NULL; +} + +static PyObject * +_blake3_blake3_copy_impl(BLAKE3Object *self) +{ + BLAKE3Object *cpy; + + if ((cpy = new_BLAKE3Object(Py_TYPE(self))) == NULL) + return NULL; + + ENTER_HASHLIB(self); + cpy->state = self->state; + LEAVE_HASHLIB(self); + return (PyObject *)cpy; +} + +static PyObject * +_blake3_blake3_update(BLAKE3Object *self, PyObject *data) +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(data, &buf); + + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) + self->lock = PyThread_allocate_lock(); + + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + blake3_hasher_update(&self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + blake3_hasher_update(&self->state, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + + Py_RETURN_NONE; +} + +// Adapted from sha3module.c. +static PyObject * +_BLAKE3_digest(const BLAKE3Object *self, Py_ssize_t length, unsigned long long seek, int hex) +{ + unsigned char *digest = NULL; + PyObject *result = NULL; + + if (length >= (1 << 29)) { + PyErr_SetString(PyExc_ValueError, "length is too large"); + return NULL; + } + digest = (unsigned char*)PyMem_Malloc(length); + if (digest == NULL) { + return PyErr_NoMemory(); + } + + /* Get the raw (binary) digest value */ + ENTER_HASHLIB(self); + blake3_hasher_finalize_seek(&self->state, seek, digest, length); + LEAVE_HASHLIB(self); + if (hex) { + result = _Py_strhex((const char *)digest, length); + } else { + result = PyBytes_FromStringAndSize((const char *)digest, length); + } + PyMem_Free(digest); + return result; +} + +static PyObject * +_blake3_blake3_digest_impl(BLAKE3Object *self, Py_ssize_t length, unsigned long long seek) +{ + return _BLAKE3_digest(self, length, seek, 0); +} + +static PyObject * +_blake3_blake3_hexdigest_impl(BLAKE3Object *self, Py_ssize_t length, unsigned long long seek) +{ + return _BLAKE3_digest(self, length, seek, 1); +} + + +static PyMethodDef py_blake3_methods[] = { + _BLAKE3_BLAKE3_COPY_METHODDEF + _BLAKE3_BLAKE3_DIGEST_METHODDEF + _BLAKE3_BLAKE3_HEXDIGEST_METHODDEF + _BLAKE3_BLAKE3_UPDATE_METHODDEF + {NULL, NULL} +}; + + + +static PyObject * +py_blake3_get_name(BLAKE3Object *self, void *closure) +{ + return PyUnicode_FromString("blake3"); +} + + + +static PyObject * +py_blake3_get_block_size(BLAKE3Object *self, void *closure) +{ + return PyLong_FromLong(BLAKE3_BLOCK_LEN); +} + + + +static PyObject * +py_blake3_get_digest_size(BLAKE3Object *self, void *closure) +{ + return PyLong_FromLong(BLAKE3_OUT_LEN); +} + + +static PyGetSetDef py_blake3_getsetters[] = { + {"name", (getter)py_blake3_get_name, + NULL, NULL, NULL}, + {"block_size", (getter)py_blake3_get_block_size, + NULL, NULL, NULL}, + {"digest_size", (getter)py_blake3_get_digest_size, + NULL, NULL, NULL}, + {NULL} +}; + + +static void +py_blake3_dealloc(PyObject *self) +{ + BLAKE3Object *obj = (BLAKE3Object *)self; + + /* Try not to leave state in memory. */ + // secure_zero_memory(&obj->param, sizeof(obj->param)); + // secure_zero_memory(&obj->state, sizeof(obj->state)); + if (obj->lock) { + PyThread_free_lock(obj->lock); + obj->lock = NULL; + } + + PyTypeObject *type = Py_TYPE(self); + PyObject_Free(self); + Py_DECREF(type); +} + +static PyType_Slot blake3_type_slots[] = { + {Py_tp_dealloc, py_blake3_dealloc}, + {Py_tp_doc, (char *)py_blake3_new__doc__}, + {Py_tp_methods, py_blake3_methods}, + {Py_tp_getset, py_blake3_getsetters}, + {Py_tp_new, py_blake3_new}, + {0,0} +}; + +PyType_Spec blake3_type_spec = { + .name = "_blake3.blake3", + .basicsize = sizeof(BLAKE3Object), + .flags = Py_TPFLAGS_DEFAULT, + .slots = blake3_type_slots +}; diff --git a/Modules/_blake3/blake3_impl.c.h b/Modules/_blake3/blake3_impl.c.h new file mode 100644 index 00000000000000..700b39a14f9997 --- /dev/null +++ b/Modules/_blake3/blake3_impl.c.h @@ -0,0 +1,158 @@ +PyDoc_STRVAR(py_blake3_new__doc__, +"blake3(data=b\'\', /, *, key=None, derive_key_context=None, usedforsecurity=True)\n" +"--\n" +"\n" +"Return a new BLAKE3 hash object."); + +static PyObject * +py_blake3_new_impl(PyTypeObject *type, PyObject *data, Py_buffer *key, + const char *derive_key_context_utf8, Py_ssize_t derive_key_context_len, + int usedforsecurity); + +static PyObject * +py_blake3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "key", "derive_key_context", "usedforsecurity", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "blake3", 0}; + PyObject *argsbuf[4]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *data = NULL; + Py_buffer key = {NULL, NULL}; + Py_buffer *key_ptr = NULL; + const char* derive_key_context_utf8 = NULL; + Py_ssize_t derive_key_context_len = 0; + int usedforsecurity = 1; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 1, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (nargs < 1) { + goto skip_optional_posonly; + } + noptargs--; + data = fastargs[0]; +skip_optional_posonly: + if (!noptargs) { + goto skip_optional_kwonly; + } + if (fastargs[1] && fastargs[1] != Py_None) { + if (PyObject_GetBuffer(fastargs[1], &key, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&key, 'C')) { + _PyArg_BadArgument("blake3", "argument 'key'", "contiguous buffer", fastargs[1]); + goto exit; + } + key_ptr = &key; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[2] && fastargs[2] != Py_None) { + derive_key_context_utf8 = PyUnicode_AsUTF8AndSize(fastargs[2], + &derive_key_context_len); + if (derive_key_context_utf8 == NULL) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + usedforsecurity = PyObject_IsTrue(fastargs[3]); + if (usedforsecurity < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = py_blake3_new_impl(type, data, key_ptr, derive_key_context_utf8, + derive_key_context_len, usedforsecurity); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + return return_value; +} + +PyDoc_STRVAR(_blake3_blake3_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a copy of the hash object."); + +#define _BLAKE3_BLAKE3_COPY_METHODDEF \ + {"copy", (PyCFunction)_blake3_blake3_copy, METH_NOARGS, _blake3_blake3_copy__doc__}, + +static PyObject * +_blake3_blake3_copy_impl(BLAKE3Object *self); + +static PyObject * +_blake3_blake3_copy(BLAKE3Object *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake3_blake3_copy_impl(self); +} + +PyDoc_STRVAR(_blake3_blake3_update__doc__, +"update($self, data, /)\n" +"--\n" +"\n" +"Update this hash object\'s state with the provided bytes-like object."); + +#define _BLAKE3_BLAKE3_UPDATE_METHODDEF \ + {"update", (PyCFunction)_blake3_blake3_update, METH_O, _blake3_blake3_update__doc__}, + +PyDoc_STRVAR(_blake3_blake3_digest__doc__, +"digest($self, length=32, seek=0, /)\n" +"--\n" +"\n" +"Return the digest value as a bytes object."); + +#define _BLAKE3_BLAKE3_DIGEST_METHODDEF \ + {"digest", (PyCFunction)(void(*)(void))_blake3_blake3_digest, METH_VARARGS | METH_KEYWORDS, _blake3_blake3_digest__doc__}, + +static PyObject * +_blake3_blake3_digest_impl(BLAKE3Object *self, Py_ssize_t length, unsigned long long seek); + +static PyObject * +_blake3_blake3_digest(BLAKE3Object *self, PyObject *args, PyObject *kwargs) +{ + static char* keywords[] = { "length", "seek", NULL }; + Py_ssize_t length = BLAKE3_OUT_LEN; + unsigned long long seek = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|nK:digest", keywords, + &length, &seek)) + { + return NULL; + } + return _blake3_blake3_digest_impl(self, length, seek); +} + +PyDoc_STRVAR(_blake3_blake3_hexdigest__doc__, +"hexdigest($self, length=32, seek=0, /)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _BLAKE3_BLAKE3_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)(void(*)(void))_blake3_blake3_hexdigest, METH_VARARGS | METH_KEYWORDS, _blake3_blake3_hexdigest__doc__}, + +static PyObject * +_blake3_blake3_hexdigest_impl(BLAKE3Object *self, Py_ssize_t length, unsigned long long seek); + +static PyObject * +_blake3_blake3_hexdigest(BLAKE3Object *self, PyObject *args, PyObject *kwargs) +{ + static char* keywords[] = { "length", "seek", NULL }; + Py_ssize_t length = BLAKE3_OUT_LEN; + unsigned long long seek = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|nK:hexdigest", keywords, + &length, &seek)) + { + return NULL; + } + return _blake3_blake3_hexdigest_impl(self, length, seek); +} diff --git a/Modules/_blake3/blake3module.c b/Modules/_blake3/blake3module.c new file mode 100644 index 00000000000000..e73742e3cf4c56 --- /dev/null +++ b/Modules/_blake3/blake3module.c @@ -0,0 +1,106 @@ +/* + * Written in 2013 by Dmitry Chestnykh + * Modified for CPython by Christian Heimes + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +#include "Python.h" + +extern PyType_Spec blake3_type_spec; + +PyDoc_STRVAR(blake3mod__doc__, +"_blake3 provides BLAKE3 for hashlib\n" +); + +typedef struct { + PyTypeObject* blake3_type; +} Blake3State; + +static inline Blake3State* +blake3_get_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (Blake3State *)state; +} + +static struct PyMethodDef blake3mod_functions[] = { + {NULL, NULL} +}; + +static int +_blake3_traverse(PyObject *module, visitproc visit, void *arg) +{ + Blake3State *state = blake3_get_state(module); + Py_VISIT(state->blake3_type); + return 0; +} + +static int +_blake3_clear(PyObject *module) +{ + Blake3State *state = blake3_get_state(module); + Py_CLEAR(state->blake3_type); + return 0; +} + +static void +_blake3_free(void *module) +{ + _blake3_clear((PyObject *)module); +} + +#define ADD_INT(d, name, value) do { \ + PyObject *x = PyLong_FromLong(value); \ + if (!x) \ + return -1; \ + if (PyDict_SetItemString(d, name, x) < 0) { \ + Py_DECREF(x); \ + return -1; \ + } \ + Py_DECREF(x); \ +} while(0) + +static int +blake3_exec(PyObject *m) +{ + Blake3State* st = blake3_get_state(m); + + st->blake3_type = (PyTypeObject *)PyType_FromModuleAndSpec( + m, &blake3_type_spec, NULL); + + if (NULL == st->blake3_type) + return -1; + if (PyModule_AddType(m, st->blake3_type) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot _blake3_slots[] = { + {Py_mod_exec, blake3_exec}, + {0, NULL} +}; + +static struct PyModuleDef blake3_module = { + PyModuleDef_HEAD_INIT, + "_blake3", + .m_doc = blake3mod__doc__, + .m_size = sizeof(Blake3State), + .m_methods = blake3mod_functions, + .m_slots = _blake3_slots, + .m_traverse = _blake3_traverse, + .m_clear = _blake3_clear, + .m_free = _blake3_free, +}; + +PyMODINIT_FUNC +PyInit__blake3(void) +{ + return PyModuleDef_Init(&blake3_module); +} diff --git a/PC/config.c b/PC/config.c index 87cd76d37bede8..8b5cda24590a36 100644 --- a/PC/config.c +++ b/PC/config.c @@ -102,6 +102,7 @@ struct _inittab _PyImport_Inittab[] = { {"_sha512", PyInit__sha512}, {"_sha3", PyInit__sha3}, {"_blake2", PyInit__blake2}, + {"_blake3", PyInit__blake3}, {"time", PyInit_time}, {"_thread", PyInit__thread}, {"_statistics", PyInit__statistics}, diff --git a/configure b/configure index ad0367fe0e20bb..85890992ed7597 100755 --- a/configure +++ b/configure @@ -1606,9 +1606,9 @@ Optional Packages: leave OpenSSL's defaults untouched, STRING: use a custom string, PROTOCOL_SSLv2 ignores the setting, see Doc/library/ssl.rst - --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2 + --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2,blake3 builtin hash modules, md5, sha1, sha256, sha512, - sha3 (with shake), blake2 + sha3 (with shake), blake2, blake3 --with-experimental-isolated-subinterpreters better isolate subinterpreters, experimental build mode (default is no) @@ -17779,7 +17779,7 @@ fi # builtin hash modules -default_hashlib_hashes="md5,sha1,sha256,sha512,sha3,blake2" +default_hashlib_hashes="md5,sha1,sha256,sha512,sha3,blake2,blake3" $as_echo "#define PY_BUILTIN_HASHLIB_HASHES /**/" >>confdefs.h diff --git a/setup.py b/setup.py index af384409553eb8..367f8d2d011993 100644 --- a/setup.py +++ b/setup.py @@ -2479,7 +2479,7 @@ def detect_hash_builtins(self): # (issue #14693). It's harmless and the object code is tiny # (40-50 KiB per module, only loaded when actually used). Modules can # be disabled via the --with-builtin-hashlib-hashes configure flag. - supported = {"md5", "sha1", "sha256", "sha512", "sha3", "blake2"} + supported = {"md5", "sha1", "sha256", "sha512", "sha3", "blake2", "blake3"} configured = sysconfig.get_config_var("PY_BUILTIN_HASHLIB_HASHES") configured = configured.strip('"').lower() @@ -2532,6 +2532,29 @@ def detect_hash_builtins(self): depends=blake2_deps )) + if "blake3" in configured: + blake3_deps = glob( + os.path.join(escape(self.srcdir), 'Modules/_blake3/impl/*') + ) + blake3_deps.append('hashlib.h') + self.add(Extension( + '_blake3', + [ + '_blake3/blake3module.c', + '_blake3/blake3_impl.c', + '_blake3/impl/blake3.c', + '_blake3/impl/blake3_dispatch.c', + '_blake3/impl/blake3_portable.c', + ], + extra_objects = [ + 'Modules/_blake3/impl/blake3_sse2_x86-64_unix.S', + 'Modules/_blake3/impl/blake3_sse41_x86-64_unix.S', + 'Modules/_blake3/impl/blake3_avx2_x86-64_unix.S', + 'Modules/_blake3/impl/blake3_avx512_x86-64_unix.S', + ], + depends=blake3_deps + )) + if "sha3" in configured: sha3_deps = glob( os.path.join(escape(self.srcdir), 'Modules/_sha3/kcp/*')