bpo-40602: Rename hashtable.h to pycore_hashtable.h (GH-20044)

* Move Modules/hashtable.h to Include/internal/pycore_hashtable.h * Move Modules/hashtable.c to Python/hashtable.c * Python is now linked to hashtable.c. _tracemalloc is no longer linked to hashtable.c. Previously, marshal.c got hashtable.c via _tracemalloc.c which is built as a builtin module.
author: Victor Stinner <vstinner@python.org> 2020-05-12 02:42:19 +0200
committer: GitHub <noreply@github.com> 2020-05-12 02:42:19 +0200
commit: b617993b7c0b0f6f679ef7003a62d0318b6d6af9 (patch)
tree: c0527f03dd007c40d39de754aee44af36abefda2 /Python/hashtable.c
parent: 21cdb711e3b1975398c54141e519ead02670610e (diff)
download: cpython-b617993b7c0b0f6f679ef7003a62d0318b6d6af9.tar.gz
cpython-b617993b7c0b0f6f679ef7003a62d0318b6d6af9.zip
1 files changed, 524 insertions, 0 deletions
diff --git a/Python/hashtable.c b/Python/hashtable.c
new file mode 100644
index 00000000000..22b84590105
--- /dev/null
+++ b/Python/hashtable.c
@@ -0,0 +1,524 @@
+/* The implementation of the hash table (_Py_hashtable_t) is based on the
+   cfuhash project:
+   http://sourceforge.net/projects/libcfu/
+
+   Copyright of cfuhash:
+   ----------------------------------
+   Creation date: 2005-06-24 21:22:40
+   Authors: Don
+   Change log:
+
+   Copyright (c) 2005 Don Owens
+   All rights reserved.
+
+   This code is released under the BSD license:
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+     * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+     * Neither the name of the author nor the names of its
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+   OF THE POSSIBILITY OF SUCH DAMAGE.
+   ----------------------------------
+*/
+
+#include "Python.h"
+#include "pycore_hashtable.h"
+
+#define HASHTABLE_MIN_SIZE 16
+#define HASHTABLE_HIGH 0.50
+#define HASHTABLE_LOW 0.10
+#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
+
+#define BUCKETS_HEAD(SLIST) \
+        ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
+#define TABLE_HEAD(HT, BUCKET) \
+        ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
+#define ENTRY_NEXT(ENTRY) \
+        ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
+#define HASHTABLE_ITEM_SIZE(HT) \
+        (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
+
+#define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
+    do { \
+        assert((DATA_SIZE) == (TABLE)->data_size); \
+        memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \
+                  (DATA_SIZE)); \
+    } while (0)
+
+#define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
+    do { \
+        assert((DATA_SIZE) == (TABLE)->data_size); \
+        memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \
+                  (PDATA), (DATA_SIZE)); \
+    } while (0)
+
+/* Forward declaration */
+static void hashtable_rehash(_Py_hashtable_t *ht);
+
+static void
+_Py_slist_init(_Py_slist_t *list)
+{
+    list->head = NULL;
+}
+
+
+static void
+_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
+{
+    item->next = list->head;
+    list->head = item;
+}
+
+
+static void
+_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
+                 _Py_slist_item_t *item)
+{
+    if (previous != NULL)
+        previous->next = item->next;
+    else
+        list->head = item->next;
+}
+
+
+Py_uhash_t
+_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
+{
+    void *key;
+
+    _Py_HASHTABLE_READ_KEY(ht, pkey, key);
+    return (Py_uhash_t)_Py_HashPointer(key);
+}
+
+
+int
+_Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey,
+                             const _Py_hashtable_entry_t *entry)
+{
+    const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
+    return (memcmp(pkey, pkey2, ht->key_size) == 0);
+}
+
+
+/* makes sure the real size of the buckets array is a power of 2 */
+static size_t
+round_size(size_t s)
+{
+    size_t i;
+    if (s < HASHTABLE_MIN_SIZE)
+        return HASHTABLE_MIN_SIZE;
+    i = 1;
+    while (i < s)
+        i <<= 1;
+    return i;
+}
+
+
+_Py_hashtable_t *
+_Py_hashtable_new_full(size_t key_size, size_t data_size,
+                       size_t init_size,
+                       _Py_hashtable_hash_func hash_func,
+                       _Py_hashtable_compare_func compare_func,
+                       _Py_hashtable_allocator_t *allocator)
+{
+    _Py_hashtable_t *ht;
+    size_t buckets_size;
+    _Py_hashtable_allocator_t alloc;
+
+    if (allocator == NULL) {
+        alloc.malloc = PyMem_RawMalloc;
+        alloc.free = PyMem_RawFree;
+    }
+    else
+        alloc = *allocator;
+
+    ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
+    if (ht == NULL)
+        return ht;
+
+    ht->num_buckets = round_size(init_size);
+    ht->entries = 0;
+    ht->key_size = key_size;
+    ht->data_size = data_size;
+
+    buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
+    ht->buckets = alloc.malloc(buckets_size);
+    if (ht->buckets == NULL) {
+        alloc.free(ht);
+        return NULL;
+    }
+    memset(ht->buckets, 0, buckets_size);
+
+    ht->hash_func = hash_func;
+    ht->compare_func = compare_func;
+    ht->alloc = alloc;
+    return ht;
+}
+
+
+_Py_hashtable_t *
+_Py_hashtable_new(size_t key_size, size_t data_size,
+                  _Py_hashtable_hash_func hash_func,
+                  _Py_hashtable_compare_func compare_func)
+{
+    return _Py_hashtable_new_full(key_size, data_size,
+                                  HASHTABLE_MIN_SIZE,
+                                  hash_func, compare_func,
+                                  NULL);
+}
+
+
+size_t
+_Py_hashtable_size(_Py_hashtable_t *ht)
+{
+    size_t size;
+
+    size = sizeof(_Py_hashtable_t);
+
+    /* buckets */
+    size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
+
+    /* entries */
+    size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
+
+    return size;
+}
+
+
+#ifdef Py_DEBUG
+void
+_Py_hashtable_print_stats(_Py_hashtable_t *ht)
+{
+    size_t size;
+    size_t chain_len, max_chain_len, total_chain_len, nchains;
+    _Py_hashtable_entry_t *entry;
+    size_t hv;
+    double load;
+
+    size = _Py_hashtable_size(ht);
+
+    load = (double)ht->entries / ht->num_buckets;
+
+    max_chain_len = 0;
+    total_chain_len = 0;
+    nchains = 0;
+    for (hv = 0; hv < ht->num_buckets; hv++) {
+        entry = TABLE_HEAD(ht, hv);
+        if (entry != NULL) {
+            chain_len = 0;
+            for (; entry; entry = ENTRY_NEXT(entry)) {
+                chain_len++;
+            }
+            if (chain_len > max_chain_len)
+                max_chain_len = chain_len;
+            total_chain_len += chain_len;
+            nchains++;
+        }
+    }
+    printf("hash table %p: entries=%"
+           PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
+           (void *)ht, ht->entries, ht->num_buckets, load * 100.0);
+    if (nchains)
+        printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
+    printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n",
+           max_chain_len, size / 1024);
+}
+#endif
+
+
+_Py_hashtable_entry_t *
+_Py_hashtable_get_entry(_Py_hashtable_t *ht,
+                        size_t key_size, const void *pkey)
+{
+    Py_uhash_t key_hash;
+    size_t index;
+    _Py_hashtable_entry_t *entry;
+
+    assert(key_size == ht->key_size);
+
+    key_hash = ht->hash_func(ht, pkey);
+    index = key_hash & (ht->num_buckets - 1);
+
+    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+        if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
+            break;
+    }
+
+    return entry;
+}
+
+
+static int
+_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                        void *data, size_t data_size)
+{
+    Py_uhash_t key_hash;
+    size_t index;
+    _Py_hashtable_entry_t *entry, *previous;
+
+    assert(key_size == ht->key_size);
+
+    key_hash = ht->hash_func(ht, pkey);
+    index = key_hash & (ht->num_buckets - 1);
+
+    previous = NULL;
+    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+        if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
+            break;
+        previous = entry;
+    }
+
+    if (entry == NULL)
+        return 0;
+
+    _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
+                     (_Py_slist_item_t *)entry);
+    ht->entries--;
+
+    if (data != NULL)
+        ENTRY_READ_PDATA(ht, entry, data_size, data);
+    ht->alloc.free(entry);
+
+    if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
+        hashtable_rehash(ht);
+    return 1;
+}
+
+
+int
+_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                  size_t data_size, const void *data)
+{
+    Py_uhash_t key_hash;
+    size_t index;
+    _Py_hashtable_entry_t *entry;
+
+    assert(key_size == ht->key_size);
+
+    assert(data != NULL || data_size == 0);
+#ifndef NDEBUG
+    /* Don't write the assertion on a single line because it is interesting
+       to know the duplicated entry if the assertion failed. The entry can
+       be read using a debugger. */
+    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
+    assert(entry == NULL);
+#endif
+
+    key_hash = ht->hash_func(ht, pkey);
+    index = key_hash & (ht->num_buckets - 1);
+
+    entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
+    if (entry == NULL) {
+        /* memory allocation failed */
+        return -1;
+    }
+
+    entry->key_hash = key_hash;
+    memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size);
+    if (data)
+        ENTRY_WRITE_PDATA(ht, entry, data_size, data);
+
+    _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
+    ht->entries++;
+
+    if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
+        hashtable_rehash(ht);
+    return 0;
+}
+
+
+int
+_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
+                  size_t data_size, void *data)
+{
+    _Py_hashtable_entry_t *entry;
+
+    assert(data != NULL);
+
+    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
+    if (entry == NULL)
+        return 0;
+    ENTRY_READ_PDATA(ht, entry, data_size, data);
+    return 1;
+}
+
+
+int
+_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                  size_t data_size, void *data)
+{
+    assert(data != NULL);
+    return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
+}
+
+
+/* Code commented since the function is not needed in Python */
+#if 0
+void
+_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
+{
+#ifndef NDEBUG
+    int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
+    assert(found);
+#else
+    (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
+#endif
+}
+#endif
+
+
+int
+_Py_hashtable_foreach(_Py_hashtable_t *ht,
+                      _Py_hashtable_foreach_func func,
+                      void *arg)
+{
+    _Py_hashtable_entry_t *entry;
+    size_t hv;
+
+    for (hv = 0; hv < ht->num_buckets; hv++) {
+        for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+            int res = func(ht, entry, arg);
+            if (res)
+                return res;
+        }
+    }
+    return 0;
+}
+
+
+static void
+hashtable_rehash(_Py_hashtable_t *ht)
+{
+    size_t buckets_size, new_size, bucket;
+    _Py_slist_t *old_buckets = NULL;
+    size_t old_num_buckets;
+
+    new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
+    if (new_size == ht->num_buckets)
+        return;
+
+    old_num_buckets = ht->num_buckets;
+
+    buckets_size = new_size * sizeof(ht->buckets[0]);
+    old_buckets = ht->buckets;
+    ht->buckets = ht->alloc.malloc(buckets_size);
+    if (ht->buckets == NULL) {
+        /* cancel rehash on memory allocation failure */
+        ht->buckets = old_buckets ;
+        /* memory allocation failed */
+        return;
+    }
+    memset(ht->buckets, 0, buckets_size);
+
+    ht->num_buckets = new_size;
+
+    for (bucket = 0; bucket < old_num_buckets; bucket++) {
+        _Py_hashtable_entry_t *entry, *next;
+        for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
+            size_t entry_index;
+
+
+            assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash);
+            next = ENTRY_NEXT(entry);
+            entry_index = entry->key_hash & (new_size - 1);
+
+            _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
+        }
+    }
+
+    ht->alloc.free(old_buckets);
+}
+
+
+void
+_Py_hashtable_clear(_Py_hashtable_t *ht)
+{
+    _Py_hashtable_entry_t *entry, *next;
+    size_t i;
+
+    for (i=0; i < ht->num_buckets; i++) {
+        for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
+            next = ENTRY_NEXT(entry);
+            ht->alloc.free(entry);
+        }
+        _Py_slist_init(&ht->buckets[i]);
+    }
+    ht->entries = 0;
+    hashtable_rehash(ht);
+}
+
+
+void
+_Py_hashtable_destroy(_Py_hashtable_t *ht)
+{
+    size_t i;
+
+    for (i = 0; i < ht->num_buckets; i++) {
+        _Py_slist_item_t *entry = ht->buckets[i].head;
+        while (entry) {
+            _Py_slist_item_t *entry_next = entry->next;
+            ht->alloc.free(entry);
+            entry = entry_next;
+        }
+    }
+
+    ht->alloc.free(ht->buckets);
+    ht->alloc.free(ht);
+}
+
+
+_Py_hashtable_t *
+_Py_hashtable_copy(_Py_hashtable_t *src)
+{
+    const size_t key_size = src->key_size;
+    const size_t data_size = src->data_size;
+    _Py_hashtable_t *dst;
+    _Py_hashtable_entry_t *entry;
+    size_t bucket;
+    int err;
+
+    dst = _Py_hashtable_new_full(key_size, data_size,
+                                 src->num_buckets,
+                                 src->hash_func,
+                                 src->compare_func,
+                                 &src->alloc);
+    if (dst == NULL)
+        return NULL;
+
+    for (bucket=0; bucket < src->num_buckets; bucket++) {
+        entry = TABLE_HEAD(src, bucket);
+        for (; entry; entry = ENTRY_NEXT(entry)) {
+            const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry);
+            const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry);
+            err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata);
+            if (err) {
+                _Py_hashtable_destroy(dst);
+                return NULL;
+            }
+        }
+    }
+    return dst;
+}
author	Victor Stinner <vstinner@python.org>	2020-05-12 02:42:19 +0200
committer	GitHub <noreply@github.com>	2020-05-12 02:42:19 +0200
commit	b617993b7c0b0f6f679ef7003a62d0318b6d6af9 (patch)
tree	c0527f03dd007c40d39de754aee44af36abefda2 /Python/hashtable.c
parent	21cdb711e3b1975398c54141e519ead02670610e (diff)
download	cpython-b617993b7c0b0f6f679ef7003a62d0318b6d6af9.tar.gz cpython-b617993b7c0b0f6f679ef7003a62d0318b6d6af9.zip