Skip to content

Commit db1ee6a

Browse files
authored
Merge pull request #245 from SwayamInSync/hash-impl
FEAT: Implementing hash support in QuadDtype
2 parents 5f04fe1 + 90db2cd commit db1ee6a

7 files changed

Lines changed: 266 additions & 1 deletion

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,5 @@ compile_commands.json
141141
# quaddtype
142142
/quaddtype/subprojects/qblas/
143143
/quaddtype/subprojects/sleef/
144+
/quaddtype/subprojects/pythoncapi-compat/
144145
.wraplock

quaddtype/meson.build

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ incdir_numpy = run_command(py,
7979
check : true
8080
).stdout().strip()
8181

82+
# pythoncapi-compat for portable C API usage across Python versions
83+
pythoncapi_compat_subproj = subproject('pythoncapi-compat')
84+
pythoncapi_compat_inc = pythoncapi_compat_subproj.get_variable('incdir')
85+
8286
# print numpy version used
8387
numpy_version = run_command(py,
8488
['-c', 'import numpy; print(numpy.__version__)'],
@@ -154,6 +158,7 @@ includes = include_directories(
154158
'numpy_quaddtype/src',
155159
]
156160
)
161+
pythoncapi_includes = pythoncapi_compat_inc
157162

158163
srcs = [
159164
'numpy_quaddtype/src/quad_common.h',
@@ -208,5 +213,5 @@ py.extension_module('_quaddtype_main',
208213
dependencies: dependencies,
209214
install: true,
210215
subdir: 'numpy_quaddtype',
211-
include_directories: [includes, build_includes],
216+
include_directories: [includes, build_includes, pythoncapi_includes],
212217
)

quaddtype/numpy_quaddtype/src/scalar.c

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "dtype.h"
1818
#include "lock.h"
1919
#include "utilities.h"
20+
#include "constants.hpp"
21+
#include "pythoncapi_compat.h"
2022

2123

2224
QuadPrecisionObject *
@@ -624,6 +626,112 @@ static PyGetSetDef QuadPrecision_getset[] = {
624626
{NULL} /* Sentinel */
625627
};
626628

629+
/*
630+
* Hash function for QuadPrecision scalars.
631+
*
632+
* This implements the same algorithm as CPython's _Py_HashDouble, adapted for
633+
* 128-bit floating point. The algorithm computes a hash based
634+
* on the reduction of the value modulo the prime P = 2**PYHASH_BITS - 1.
635+
* https://github.com/python/cpython/blob/20b69aac0d19a5e5358362410d9710887762f0e7/Python/pyhash.c#L87
636+
*
637+
* Key invariant: hash(x) == hash(y) whenever x and y are numerically equal,
638+
* even if x and y have different types. This ensures that:
639+
* hash(QuadPrecision(1.0)) == hash(1.0) == hash(1)
640+
*
641+
* The algorithm:
642+
* 1. Handle special cases: inf returns PyHASH_INF, nan uses pointer hash
643+
* 2. Extract mantissa m in [0.5, 1.0) and exponent e via frexp(v) = m * 2^e
644+
* 3. Process mantissa 28 bits at a time, accumulating into hash value x
645+
* 4. Adjust for exponent using bit rotation (since 2^PyHASH_BITS ≡ 1 mod P)
646+
* 5. Apply sign and handle the special case of -1 -> -2
647+
*/
648+
649+
static Py_hash_t
650+
QuadPrecision_hash(QuadPrecisionObject *self)
651+
{
652+
Sleef_quad value;
653+
int sign = 1;
654+
655+
if (self->backend == BACKEND_SLEEF) {
656+
value = self->value.sleef_value;
657+
}
658+
else {
659+
value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
660+
}
661+
662+
// Check for NaN - use pointer hash (each NaN instance gets unique hash)
663+
// This prevents hash table catastrophic pileups from NaN instances
664+
if (Sleef_iunordq1(value, value)) {
665+
return Py_HashPointer((void *)self);
666+
}
667+
668+
if (Sleef_icmpeqq1(value, QUAD_PRECISION_INF)) {
669+
return PyHASH_INF;
670+
}
671+
if (Sleef_icmpeqq1(value, QUAD_PRECISION_NINF)) {
672+
return -PyHASH_INF;
673+
}
674+
675+
// Handle sign
676+
Sleef_quad zero = Sleef_cast_from_int64q1(0);
677+
if (Sleef_icmpltq1(value, zero)) {
678+
sign = -1;
679+
value = Sleef_negq1(value);
680+
}
681+
682+
// Get mantissa and exponent: value = m * 2^e, where 0.5 <= m < 1.0
683+
int exponent;
684+
Sleef_quad mantissa = Sleef_frexpq1(value, &exponent);
685+
686+
// Process 28 bits at a time (same as CPython's _Py_HashDouble)
687+
// This works well for both binary and hexadecimal floating point
688+
Py_uhash_t x = 0;
689+
// 2^28 = 268435456 - exactly representable in double, so cast is safe
690+
Sleef_quad multiplier = Sleef_cast_from_int64q1(1LL << 28);
691+
692+
// Continue until mantissa becomes zero (all bits processed)
693+
while (Sleef_icmpneq1(mantissa, zero)) {
694+
// Rotate x left by 28 bits within PyHASH_MODULUS
695+
x = ((x << 28) & PyHASH_MODULUS) | (x >> (PyHASH_BITS - 28));
696+
697+
// Scale mantissa by 2^28
698+
mantissa = Sleef_mulq1_u05(mantissa, multiplier);
699+
exponent -= 28;
700+
701+
// Extract integer part
702+
Sleef_quad int_part = Sleef_truncq1(mantissa);
703+
Py_uhash_t y = (Py_uhash_t)Sleef_cast_to_int64q1(int_part);
704+
705+
// Remove integer part from mantissa (keep fractional part)
706+
mantissa = Sleef_subq1_u05(mantissa, int_part);
707+
708+
// Accumulate
709+
x += y;
710+
if (x >= PyHASH_MODULUS) {
711+
x -= PyHASH_MODULUS;
712+
}
713+
}
714+
715+
// Adjust for exponent: reduce e modulo PyHASH_BITS
716+
// For negative exponents: PyHASH_BITS - 1 - ((-1 - e) % PyHASH_BITS)
717+
int e = exponent >= 0
718+
? exponent % PyHASH_BITS
719+
: PyHASH_BITS - 1 - ((-1 - exponent) % PyHASH_BITS);
720+
721+
// Rotate x left by e bits
722+
x = ((x << e) & PyHASH_MODULUS) | (x >> (PyHASH_BITS - e));
723+
724+
// Apply sign
725+
x = x * sign;
726+
727+
// -1 is reserved for errors, so use -2 instead
728+
if (x == (Py_uhash_t)-1) {
729+
x = (Py_uhash_t)-2;
730+
}
731+
732+
return (Py_hash_t)x;
733+
}
734+
627735
PyTypeObject QuadPrecision_Type = {
628736
PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision",
629737
.tp_basicsize = sizeof(QuadPrecisionObject),
@@ -632,6 +740,7 @@ PyTypeObject QuadPrecision_Type = {
632740
.tp_dealloc = (destructor)QuadPrecision_dealloc,
633741
.tp_repr = (reprfunc)QuadPrecision_repr_dragon4,
634742
.tp_str = (reprfunc)QuadPrecision_str_dragon4,
743+
.tp_hash = (hashfunc)QuadPrecision_hash,
635744
.tp_as_number = &quad_as_scalar,
636745
.tp_as_buffer = &QuadPrecision_as_buffer,
637746
.tp_richcompare = (richcmpfunc)quad_richcompare,

quaddtype/pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,7 @@ strict_equality_for_none = true
5555
exclude = ["build", "numpy_quaddtype/src", "subprojects", "tests"]
5656
enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
5757
warn_unreachable = false
58+
59+
[tool.pytest.ini_options]
60+
testpaths = ["tests"]
61+
norecursedirs = ["subprojects", "build", ".mesonpy*"]

quaddtype/reinstall.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ rm -rf build/
55
rm -rf dist/
66
rm -rf subprojects/qblas
77
rm -rf subprojects/sleef
8+
rm -rf subprojects/pythoncapi-compat
89
rm -rf .mesonpy-*
910

1011
python -m pip uninstall -y numpy_quaddtype
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[wrap-git]
2+
directory=pythoncapi-compat
3+
url=https://github.com/python/pythoncapi-compat.git
4+
revision=main
5+
[provide]
6+
pythoncapi_compat = pythoncapi_compat_dep

quaddtype/tests/test_quaddtype.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5229,3 +5229,142 @@ def test_add_regression_zero_plus_small(self):
52295229

52305230
assert result_yx == result_xy, f"0 + x = {result_yx}, but x + 0 = {result_xy}"
52315231
assert result_yx == x, f"0 + x = {result_yx}, expected {x}"
5232+
5233+
5234+
class TestQuadPrecisionHash:
5235+
"""Test suite for QuadPrecision hash function.
5236+
5237+
The hash implementation follows CPython's _Py_HashDouble algorithm to ensure
5238+
the invariant: hash(x) == hash(y) when x and y are numerically equal,
5239+
even across different types.
5240+
"""
5241+
5242+
@pytest.mark.parametrize("value", [
5243+
# Values that are exactly representable in binary floating point
5244+
"0.0", "1.0", "-1.0", "2.0", "-2.0",
5245+
"0.5", "0.25", "1.5", "-0.5",
5246+
"100.0", "-100.0",
5247+
# Powers of 2 are exactly representable
5248+
"0.125", "0.0625", "4.0", "8.0",
5249+
])
5250+
def test_hash_matches_float(self, value):
5251+
"""Test that hash(QuadPrecision) == hash(float) for exactly representable values.
5252+
5253+
Note: Only values that are exactly representable in both float64 and float128
5254+
should match. Values like 0.1, 0.3 will have different hashes because they
5255+
have different binary representations at different precisions.
5256+
"""
5257+
quad_val = QuadPrecision(value)
5258+
float_val = float(value)
5259+
assert hash(quad_val) == hash(float_val)
5260+
5261+
@pytest.mark.parametrize("value", [0.1, 0.3, 0.7, 1.1, 2.3, 1e300, 1e-300])
5262+
def test_hash_matches_float_from_float(self, value):
5263+
"""Test that QuadPrecision created from float has same hash as that float.
5264+
5265+
When creating QuadPrecision from a Python float, the value is converted
5266+
from the float's double precision representation, so they should be
5267+
numerically equal and have the same hash.
5268+
"""
5269+
quad_val = QuadPrecision(value) # Created from float, not string
5270+
assert hash(quad_val) == hash(value)
5271+
5272+
@pytest.mark.parametrize("value", [0, 1, -1, 2, -2, 100, -100, 1000, -1000])
5273+
def test_hash_matches_int(self, value):
5274+
"""Test that hash(QuadPrecision) == hash(int) for integer values."""
5275+
quad_val = QuadPrecision(value)
5276+
assert hash(quad_val) == hash(value)
5277+
5278+
def test_hash_matches_large_int(self):
5279+
"""Test that hash(QuadPrecision) == hash(int) for large integers."""
5280+
big_int = 10**20
5281+
quad_val = QuadPrecision(str(big_int))
5282+
assert hash(quad_val) == hash(big_int)
5283+
5284+
def test_hash_infinity(self):
5285+
"""Test that infinity hash matches Python's float infinity hash."""
5286+
assert hash(QuadPrecision("inf")) == hash(float("inf"))
5287+
assert hash(QuadPrecision("-inf")) == hash(float("-inf"))
5288+
# Standard PyHASH_INF values
5289+
assert hash(QuadPrecision("inf")) == 314159
5290+
assert hash(QuadPrecision("-inf")) == -314159
5291+
5292+
def test_hash_nan_unique(self):
5293+
"""Test that each NaN instance gets a unique hash (pointer-based)."""
5294+
nan1 = QuadPrecision("nan")
5295+
nan2 = QuadPrecision("nan")
5296+
# NaN instances should have different hashes (based on object identity)
5297+
assert hash(nan1) != hash(nan2)
5298+
5299+
def test_hash_nan_same_instance(self):
5300+
"""Test that the same NaN instance has consistent hash."""
5301+
nan = QuadPrecision("nan")
5302+
assert hash(nan) == hash(nan)
5303+
5304+
def test_hash_negative_one(self):
5305+
"""Test that hash(-1) returns -2 (Python's hash convention)."""
5306+
# In Python, hash(-1) returns -2 because -1 is reserved for errors
5307+
assert hash(QuadPrecision(-1.0)) == -2
5308+
assert hash(QuadPrecision("-1.0")) == -2
5309+
5310+
def test_hash_set_membership(self):
5311+
"""Test that QuadPrecision values work correctly in sets."""
5312+
vals = [QuadPrecision(1.0), QuadPrecision(2.0), QuadPrecision(1.0)]
5313+
unique_set = set(vals)
5314+
assert len(unique_set) == 2
5315+
5316+
def test_hash_set_cross_type(self):
5317+
"""Test that QuadPrecision and float with same value are in same set bucket."""
5318+
s = {QuadPrecision(1.0)}
5319+
s.add(1.0)
5320+
assert len(s) == 1
5321+
5322+
def test_hash_dict_key(self):
5323+
"""Test that QuadPrecision values work as dict keys."""
5324+
d = {QuadPrecision(1.0): "one", QuadPrecision(2.0): "two"}
5325+
assert d[QuadPrecision(1.0)] == "one"
5326+
assert d[QuadPrecision(2.0)] == "two"
5327+
5328+
def test_hash_dict_cross_type_lookup(self):
5329+
"""Test that dict lookup works with float keys when hash matches."""
5330+
d = {QuadPrecision(1.0): "one"}
5331+
# Float lookup should work if hash and eq both work
5332+
assert d.get(1.0) == "one"
5333+
5334+
@pytest.mark.parametrize("value", [
5335+
# Powers of 2 outside double range but within quad range
5336+
# Double max exponent is ~1024, quad max is ~16384
5337+
2**1100, 2**2000, 2**5000, 2**10000,
5338+
-(2**1100), -(2**2000),
5339+
# Small powers of 2 (subnormal in double, normal in quad)
5340+
2**(-1100), 2**(-2000),
5341+
])
5342+
def test_hash_extreme_integers_outside_double_range(self, value):
5343+
"""Test hash matches Python int for values outside double range.
5344+
5345+
We use powers of 2 which are exactly representable in quad precision.
5346+
Since these integers are exact, hash(QuadPrecision(x)) must equal hash(x).
5347+
"""
5348+
quad_val = QuadPrecision(value)
5349+
assert hash(quad_val) == hash(value)
5350+
5351+
@pytest.mark.parametrize("value", [
5352+
"1e500", "-1e500", "1e1000", "-1e1000", "1e-500", "-1e-500",
5353+
"1.23456789e500", "-9.87654321e-600",
5354+
])
5355+
def test_hash_matches_mpmath(self, value):
5356+
"""Test hash matches mpmath at quad precision (113 bits).
5357+
5358+
mpmath with 113-bit precision represents the same value as QuadPrecision,
5359+
so their hashes must match.
5360+
"""
5361+
mp.prec = 113
5362+
quad_val = QuadPrecision(value)
5363+
mpf_val = mp.mpf(value)
5364+
assert hash(quad_val) == hash(mpf_val)
5365+
5366+
@pytest.mark.parametrize("backend", ["sleef", "longdouble"])
5367+
def test_hash_backends(self, backend):
5368+
"""Test hash works for both backends."""
5369+
quad_val = QuadPrecision(1.5, backend=backend)
5370+
assert hash(quad_val) == hash(1.5)

0 commit comments

Comments
 (0)