Skip to content

Commit

Permalink
fixing sizeof for StringHashTable
Browse files Browse the repository at this point in the history
  • Loading branch information
realead committed Nov 24, 2020
1 parent aab7beb commit d3de0d8
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
9 changes: 5 additions & 4 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -671,10 +671,11 @@ cdef class StringHashTable(HashTable):
self.table = NULL

def sizeof(self, deep=False):
""" return the size of my table in bytes """
return self.table.n_buckets * (sizeof(char *) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof(char *) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

cpdef get_item(self, str val):
cdef:
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/libs/test_hashtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,19 @@ def test_tracemalloc_works(self, table_type, dtype):
assert get_allocated_khash_memory() == 0


def test_tracemalloc_works_for_StringHashTable():
N = 1000
keys = np.arange(N).astype(np.unicode).astype(np.object)
with activated_tracemalloc():
table = ht.StringHashTable()
table.map_locations(keys)
used = get_allocated_khash_memory()
my_size = table.sizeof()
assert used == my_size
del table
assert get_allocated_khash_memory() == 0


@pytest.mark.parametrize(
"table_type, dtype",
[
Expand Down

0 comments on commit d3de0d8

Please sign in to comment.