Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions Lib/test/test_uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,22 @@ def test_uuid6_test_vectors(self):
equal((u.int >> 80) & 0xffff, 0x232a)
equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c)

def check_uuid7(
self,
u,
time_ms=None,
counter_hi=None, counter_lo=None,
tail=None
):
if time_ms is not None:
self.assertEqual(u.time, time_ms)
if counter_hi is not None:
self.assertEqual((u.int >> 64) & 0xfff, counter_hi)
if counter_lo is not None:
self.assertEqual((u.int >> 32) & 0x3fff_ffff, counter_lo)
if tail is not None:
self.assertEqual(u.int & 0xffff_ffff, tail)

def test_uuid7(self):
equal = self.assertEqual
u = self.uuid.uuid7()
Expand Down Expand Up @@ -950,6 +966,7 @@ def test_uuid7_monotonicity(self):
self.uuid,
_last_timestamp_v7=0,
_last_counter_v7=0,
_last_counter_v7_overflow=False,
):
# 1 Jan 2023 12:34:56.123_456_789
timestamp_ns = 1672533296_123_456_789 # ns precision
Expand Down Expand Up @@ -1024,6 +1041,7 @@ def test_uuid7_timestamp_backwards(self):
self.uuid,
_last_timestamp_v7=fake_last_timestamp_v7,
_last_counter_v7=counter,
_last_counter_v7_overflow=False,
),
mock.patch('time.time_ns', return_value=timestamp_ns),
mock.patch('os.urandom', return_value=tail_bytes) as urand
Expand All @@ -1049,9 +1067,13 @@ def test_uuid7_overflow_counter(self):
timestamp_ns = 1672533296_123_456_789 # ns precision
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)

# By design, counters have their MSB set to 0 so they
# will not be able to doubly overflow (they are still
# 42-bit integers).
new_counter_hi = random.getrandbits(11)
new_counter_lo = random.getrandbits(30)
new_counter = (new_counter_hi << 30) | new_counter_lo
new_counter &= 0x1ff_ffff_ffff

tail = random.getrandbits(32)
random_bits = (new_counter << 32) | tail
Expand All @@ -1063,11 +1085,14 @@ def test_uuid7_overflow_counter(self):
_last_timestamp_v7=timestamp_ms,
# same timestamp, but force an overflow on the counter
_last_counter_v7=0x3ff_ffff_ffff,
_last_counter_v7_overflow=False,
),
mock.patch('time.time_ns', return_value=timestamp_ns),
mock.patch('os.urandom', return_value=random_data) as urand
):
self.assertFalse(self.uuid._last_counter_v7_overflow)
u = self.uuid.uuid7()
self.assertTrue(self.uuid._last_counter_v7_overflow)
urand.assert_called_with(10)
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
Expand All @@ -1082,6 +1107,131 @@ def test_uuid7_overflow_counter(self):
equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo)
equal(u.int & 0xffff_ffff, tail)

# Check that the timestamp of future UUIDs created within
# the same logical millisecond does not advance after the
# counter overflowed. In addition, even if the counter could
# be incremented, we are still in an "overflow" state as the
# timestamp should not be modified unless we re-overflow.
#
# See https://github.com/python/cpython/issues/138862.
v = self.uuid.uuid7()
equal(v.time, unix_ts_ms)
self.assertTrue(self.uuid._last_counter_v7_overflow)

def test_uuid7_multiple_counter_overflows(self):
# Tests when counter overflows multiple times within the same frame.
# See https://github.com/python/cpython/issues/138862.
equal = self.assertEqual

t0_ms = 1 + random.getrandbits(24)

counter_max_value = 0x3ff_ffff_ffff
counter_max_value_hi = (counter_max_value >> 30) & 0x0fff
counter_max_value_lo = (counter_max_value & 0x3fff_ffff)

random_tail = int.from_bytes(b'\x11' * 4)
tail1 = tail3a = tail3b = random_tail
tail2a = 1 + random.getrandbits(16)
tail2b = 2 * tail2a

counter1 = counter2a = counter_max_value
counter1_hi = counter2a_hi = counter_max_value_hi
counter1_lo = counter2a_lo = counter_max_value_lo

counter2b = random.getrandbits(40)
counter2b_hi = (counter2b >> 30) & 0x0fff
counter2b_lo = (counter2b & 0x3fff_ffff)
self.assertLess(counter2b, counter_max_value - 3)

def patch_os_urandom(wraps=True):
if wraps:
return mock.patch('os.urandom', wraps=lambda n: b'\x11' * n)
return mock.patch('os.urandom')

def patch_get_counter_and_tail(c, t):
return mock.patch.object(
self.uuid,
"_uuid7_get_counter_and_tail",
return_value=(c, t),
)

def check_invariants(t, c, *, overflow):
equal(self.uuid._last_timestamp_v7, t)
equal(self.uuid._last_counter_v7, c)
self.assertIs(self.uuid._last_counter_v7_overflow, overflow)

with (
mock.patch.multiple(
self.uuid,
_last_timestamp_v7=t0_ms,
_last_counter_v7=counter_max_value - 1,
_last_counter_v7_overflow=False,
),
mock.patch('time.time_ns', return_value=1_000_000 * t0_ms),
):
# All the calls in this block to uuid7() are always assumed
# to be within the same logical millisecond but the timestamp
# that is used for the UUIDv7 objects will be altered (and
# considered in the future).

# u1's counter is now the maximal value it can have.
# For the next call, we will need to jump 1ms in the
# future and pick a new counter (in our case, it will
# be an overflowing one).
with patch_os_urandom() as urand:
u1 = self.uuid.uuid7()
urand.assert_called_once_with(4)
self.check_uuid7(u1, t0_ms, counter1_hi, counter1_lo, tail1)
# For now, we are not yet in an overflow (but all subsequent
# calls will be in an overflow state even if we normally
# increment the counters). The overflow state is only cleared
# when the physical millisecond catches up to the logical one.
check_invariants(u1.time, counter1, overflow=False)
del u1

# u1's counter is maximal, so we enter the overflow state
# and jump 1ms in the future; the randomized counter is
# still one that would cause an overflow at the next call.
with (
patch_os_urandom(wraps=False) as urand,
patch_get_counter_and_tail(counter_max_value, tail2a),
):
u2a = self.uuid.uuid7()
urand.assert_not_called()
self.check_uuid7(u2a, t0_ms + 1, counter2a_hi, counter2a_lo, tail2a)
check_invariants(u2a.time, counter2a, overflow=True)
del u2a

# u2a's counter was the maximal value so we need to update
# the timestamp and pick a new counter again (this time,
# it will be a small value that we can increment later).
with (
patch_os_urandom(wraps=False) as urand,
patch_get_counter_and_tail(counter2b, tail2b),
):
u2b = self.uuid.uuid7()
urand.assert_not_called()
self.check_uuid7(u2b, t0_ms + 2, counter2b_hi, counter2b_lo, tail2b)
check_invariants(u2b.time, counter2b, overflow=True)
del u2b

# u2a's counter was small enough that we can increment it;
# we are still in the future but we don't need to advance
# the timestamp again.
with patch_os_urandom() as urand:
u3a = self.uuid.uuid7()
urand.assert_called_once_with(4)
self.check_uuid7(u3a, t0_ms + 2, counter2b_hi, counter2b_lo + 1, tail3a)
check_invariants(u3a.time, counter2b + 1, overflow=True)
del u3a

with patch_os_urandom() as urand:
u3b = self.uuid.uuid7()
urand.assert_called_once_with(4)
self.check_uuid7(u3b, t0_ms + 2, counter2b_hi, counter2b_lo + 2, tail3b)
check_invariants(u3b.time, counter2b + 2, overflow=True)
del u3b

def test_uuid8(self):
equal = self.assertEqual
u = self.uuid.uuid8()
Expand Down
25 changes: 24 additions & 1 deletion Lib/uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,18 @@ def uuid6(node=None, clock_seq=None):

_last_timestamp_v7 = None
_last_counter_v7 = 0 # 42-bit counter
# Indicate whether one or more counter overflow(s) happened in the same frame.
#
# Since the timestamp is incremented after a counter overflow by design,
# we must prevent incrementing the timestamp again in consecutive calls
# for which the logical timestamp millisecond remains the same.
#
# If the resampled counter hits an overflow again within the same time,
# we want to advance the timestamp again and resample the timestamp.
#
# See https://github.com/python/cpython/issues/138862.
_last_counter_v7_overflow = False


def _uuid7_get_counter_and_tail():
rand = int.from_bytes(os.urandom(10))
Expand Down Expand Up @@ -862,18 +874,29 @@ def uuid7():

global _last_timestamp_v7
global _last_counter_v7
global _last_counter_v7_overflow

nanoseconds = time.time_ns()
timestamp_ms = nanoseconds // 1_000_000

if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side note: if we initialize _last_timestamp_v7 to a large negative value (e.g. -2**64), then we can remove the last_timestamp_v7 is None check.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or just -1 and it's ok. You won't have a negative timestamp I think.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could (according to Claude when setting the system time to before 1970). But since the timestamp is generated via a int64_t the value -2**64 should be safe.

counter, tail = _uuid7_get_counter_and_tail()
# Clear the overflow state every new millisecond.
_last_counter_v7_overflow = False
Comment thread
picnixz marked this conversation as resolved.
else:
if timestamp_ms < _last_timestamp_v7:
timestamp_ms = _last_timestamp_v7 + 1
# The clock went backwards or we are within the same timestamp
# after a counter overflow. We follow the RFC for in the former
# case. In the latter case, we re-use the already advanced
# timestamp (it was updated when we detected the overflow).
if _last_counter_v7_overflow:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first case from the if-else is the "latter" case in the comments above. Maybe swap the order in the comments.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right :') I changed so much this block that I forgot to update the comment. Thanks!

timestamp_ms = _last_timestamp_v7
else:
timestamp_ms = _last_timestamp_v7 + 1
# advance the 42-bit counter
counter = _last_counter_v7 + 1
if counter > 0x3ff_ffff_ffff:
_last_counter_v7_overflow = True
Comment thread
picnixz marked this conversation as resolved.
# advance the 48-bit timestamp
timestamp_ms += 1
counter, tail = _uuid7_get_counter_and_tail()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`uuid`: the timestamp of UUIDv7 objects generated within the same
millisecond after encountering a counter overflow is only incremented once
for the entire batch of UUIDv7 objects instead at each object creation.
Patch by Bénédikt Tran.
Loading