Skip to content

Commit d5bfbc9

Browse files
authored
Merge pull request #43 from bbayles/issue-39-zero-hash
Handle "zero" hash values in Reader.gets
2 parents 5aef1d8 + 64b2740 commit d5bfbc9

3 files changed

Lines changed: 61 additions & 22 deletions

File tree

.github/workflows/python-app.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
strategy:
1010
matrix:
11-
python-version: [3.6, 3.7, 3.8, 3.9, 3.10.0-rc.1, pypy3]
11+
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "pypy3"]
1212

1313
steps:
1414
- uses: actions/checkout@v2

cdblib/cdblib.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -182,30 +182,58 @@ def __len__(self):
182182

183183
def gets(self, key):
184184
'''Yield values for key in insertion order.'''
185-
key, h = self.hash_key(key)
186-
start, nslots = self.index[h & 0xff]
185+
# Algorithm from the spec: https://cr.yp.to/cdb/cdb.txt
186+
# "Compute the hash value of the key in the record."
187+
key, hashed_key = self.hash_key(key)
188+
189+
# "The hash value modulo 256 is the number of a hash table."
190+
slot_number, table_number = divmod(hashed_key, 256)
191+
table_pos, table_len = self.index[table_number]
192+
193+
# If the length of that table is 0, then there's no matching record.
194+
if not table_len:
195+
return
196+
197+
# Otherwise, compute the position of the end of the table - we'll
198+
# need it to be able to search the whole table.
199+
table_end = table_pos + (self.pair_size * table_len)
200+
201+
# "The hash value divided by 256, modulo the length of that table, is a
202+
# slot number."
203+
slot_number %= table_len
204+
205+
# "Probe that slot, the next higher slot, and so on, until you find the
206+
# record or run into an empty slot."
207+
slot_pos = table_pos + (self.pair_size * slot_number)
208+
while True:
209+
hash_value, byte_pos = self.read_pair(
210+
self.data[slot_pos:slot_pos + self.pair_size]
211+
)
212+
slot_pos += self.pair_size
213+
214+
# We ran into an empty slot: the search is finished.
215+
if not byte_pos:
216+
break
217+
218+
# Potential hit - we might have found a matching record.
219+
if hash_value == hashed_key:
220+
key_size, value_size = self.read_pair(
221+
self.data[byte_pos:byte_pos + self.pair_size]
222+
)
223+
byte_pos += self.pair_size
187224

188-
if nslots:
189-
end = start + (nslots * self.pair_size)
190-
slot_off = start + (((h >> 8) % nslots) * self.pair_size)
225+
candidate_key = self.data[byte_pos:byte_pos+key_size]
226+
byte_pos += key_size
191227

192-
for pos in chain(range(slot_off, end, self.pair_size),
193-
range(start, slot_off, self.pair_size)):
194-
rec_h, rec_pos = self.read_pair(
195-
self.data[pos:pos+self.pair_size]
196-
)
228+
candidate_value = self.data[byte_pos:byte_pos+value_size]
229+
byte_pos += value_size
230+
if candidate_key == key:
231+
yield candidate_value
197232

198-
if not rec_h:
199-
break
200-
elif rec_h == h:
201-
klen, dlen = self.read_pair(
202-
self.data[rec_pos:rec_pos+self.pair_size]
203-
)
204-
rec_pos += self.pair_size
205-
206-
if self.data[rec_pos:rec_pos+klen] == key:
207-
rec_pos += klen
208-
yield self.data[rec_pos:rec_pos+dlen]
233+
# If we've not run into an empty slot yet, we're not finished.
234+
# To go to the "next higher slot," we jump to the table's start.
235+
if slot_pos == table_end:
236+
slot_pos = table_pos
209237

210238
def get(self, key, default=None):
211239
'''Get the first value for key, returning default if missing.'''

tests/test_cdblib.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,17 @@ def test_putstrings_fail(self):
480480
]:
481481
with self.assertRaises(exc_type):
482482
self.writer.putstrings(key, value)
483+
484+
def test_zero_hash(self):
485+
# Adapted from https://github.com/bbayles/python-pure-cdb/issues/39
486+
key1 = b'xyz'
487+
key2 = bytes([*[13, 168, 240, 240, 64, 64, 128, 128, 128, 0, 128, 128, 0, 0, 0, 128, 128], *([0] * 692791), 128]) # noqa
488+
value = b'abc'
489+
self.writer.put(key1, value)
490+
self.writer.put(key2, value)
491+
reader = self.get_reader()
492+
self.assertEqual(reader.get(key1), value)
493+
self.assertEqual(reader.get(key2), value)
483494

484495

485496
class WriterNativeInterfaceDjbHashTestCase(WriterNativeInterfaceTestBase,

0 commit comments

Comments
 (0)