diff options
author | Michael Droettboom <mdboom@gmail.com> | 2025-03-27 09:57:06 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-27 09:57:06 -0400 |
commit | 8614f86b7163b1c39798b481902dbb511292a537 (patch) | |
tree | 2bf6a46b432df3d6bf01a4176f3a614a01c48566 /Objects/tupleobject.c | |
parent | cf5e438c0297954c4411c1c3ae4ba67a48b134ea (diff) | |
download | cpython-8614f86b7163b1c39798b481902dbb511292a537.tar.gz cpython-8614f86b7163b1c39798b481902dbb511292a537.zip |
gh-131525: Cache the result of tuple_hash (#131529)
* gh-131525: Cache the result of tuple_hash
* Fix debug builds
* Add blurb
* Fix formatting
* Pre-compute empty tuple singleton
* Mostly set the cache within tuple_alloc
* Fixes for TSAN
* Pre-compute empty tuple singleton
* Fix for 32-bit platforms
* Assert that op != NULL in _PyTuple_RESET_HASH_CACHE
* Use FT_ATOMIC_STORE_SSIZE_RELAXED macro
* Update Include/internal/pycore_tuple.h
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
* Fix alignment
* atomic load
* Update Objects/tupleobject.c
Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
---------
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
Diffstat (limited to 'Objects/tupleobject.c')
-rw-r--r-- | Objects/tupleobject.c | 50 |
1 files changed, 24 insertions, 26 deletions
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 78ee3fa1fc3..737c4e6d977 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -45,6 +45,7 @@ tuple_alloc(Py_ssize_t size) if (index < PyTuple_MAXSAVESIZE) { PyTupleObject *op = _Py_FREELIST_POP(PyTupleObject, tuples[index]); if (op != NULL) { + _PyTuple_RESET_HASH_CACHE(op); return op; } } @@ -53,7 +54,11 @@ tuple_alloc(Py_ssize_t size) sizeof(PyObject *))) / sizeof(PyObject *)) { return (PyTupleObject *)PyErr_NoMemory(); } - return PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); + PyTupleObject *result = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); + if (result != NULL) { + _PyTuple_RESET_HASH_CACHE(result); + } + return result; } // The empty tuple singleton is not tracked by the GC. @@ -296,50 +301,41 @@ error: For the xxHash specification, see https://github.com/Cyan4973/xxHash/blob/master/doc/xxhash_spec.md - Below are the official constants from the xxHash specification. Optimizing - compilers should emit a single "rotate" instruction for the - _PyHASH_XXROTATE() expansion. If that doesn't happen for some important - platform, the macro could be changed to expand to a platform-specific rotate - spelling instead. + The constants for the hash function are defined in pycore_tuple.h. */ -#if SIZEOF_PY_UHASH_T > 4 -#define _PyHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL) -#define _PyHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL) -#define _PyHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL) -#define _PyHASH_XXROTATE(x) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */ -#else -#define _PyHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL) -#define _PyHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL) -#define _PyHASH_XXPRIME_5 ((Py_uhash_t)374761393UL) -#define _PyHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */ -#endif -/* Tests have shown that it's not worth to cache the hash value, see - https://bugs.python.org/issue9685 */ static Py_hash_t tuple_hash(PyObject *op) { PyTupleObject *v = _PyTuple_CAST(op); + + Py_uhash_t acc = FT_ATOMIC_LOAD_SSIZE_RELAXED(v->ob_hash); + if (acc != (Py_uhash_t)-1) { + return acc; + } + Py_ssize_t len = Py_SIZE(v); PyObject **item = v->ob_item; - - Py_uhash_t acc = _PyHASH_XXPRIME_5; + acc = _PyTuple_HASH_XXPRIME_5; for (Py_ssize_t i = 0; i < len; i++) { Py_uhash_t lane = PyObject_Hash(item[i]); if (lane == (Py_uhash_t)-1) { return -1; } - acc += lane * _PyHASH_XXPRIME_2; - acc = _PyHASH_XXROTATE(acc); - acc *= _PyHASH_XXPRIME_1; + acc += lane * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; } /* Add input length, mangled to keep the historical value of hash(()). */ - acc += len ^ (_PyHASH_XXPRIME_5 ^ 3527539UL); + acc += len ^ (_PyTuple_HASH_XXPRIME_5 ^ 3527539UL); if (acc == (Py_uhash_t)-1) { - return 1546275796; + acc = 1546275796; } + + FT_ATOMIC_STORE_SSIZE_RELAXED(v->ob_hash, acc); + return acc; } @@ -764,6 +760,8 @@ tuple_subtype_new(PyTypeObject *type, PyObject *iterable) } Py_DECREF(tmp); + _PyTuple_RESET_HASH_CACHE(newobj); + // Don't track if a subclass tp_alloc is PyType_GenericAlloc() if (!_PyObject_GC_IS_TRACKED(newobj)) { _PyObject_GC_TRACK(newobj); |