From 852d309e755c34b2941e1bb6211a203ca28ca95d Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Wed, 6 May 2026 18:57:15 +0000 Subject: [PATCH 1/4] `TestJumpToDateEndpoint`: Guard the parallel boundary subtests against millisecond collisions. Both subtests sample `time.Now()` and immediately probe a homeserver that stamps events using its own millisecond clock; when the two readings land in the same millisecond, `/timestamp_to_event`'s inclusive boundary semantics return an event the subtest does not want. `should_find_event_after_given_timestamp` races the trailing createRoom state event ahead of the message that a forward search is expected to find; `should_find_nothing_before_the_earliest_timestamp` races `m.room.create` against a backward search expected to find nothing. Pause around the relevant samples so the next homeserver stamp lands in a strictly later millisecond. Both flakes have been observed across multiple homeserver implementations on otherwise green runs. Closes #868. Signed-off-by: Jason Volk --- tests/room_timestamp_to_event_test.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/room_timestamp_to_event_test.go b/tests/room_timestamp_to_event_test.go index 125f6606..6afba207 100644 --- a/tests/room_timestamp_to_event_test.go +++ b/tests/room_timestamp_to_event_test.go @@ -56,6 +56,14 @@ func TestJumpToDateEndpoint(t *testing.T) { t.Run("should find nothing before the earliest timestamp", func(t *testing.T) { t.Parallel() timeBeforeRoomCreation := time.Now() + // /timestamp_to_event is millisecond-granular by spec, and the next + // call (createTestRoom) writes an m.room.create whose + // origin_server_ts can land in the same millisecond as the sample + // above. A backward search at that millisecond inclusively returns + // the create event when the test wants 404. Pause long enough that + // every event the homeserver subsequently stamps is in a strictly + // later millisecond. See matrix-org/complement#868. + time.Sleep(tsBoundaryGuard) roomID, _, _ := createTestRoom(t, alice) mustCheckEventisReturnedForTime(t, alice, roomID, timeBeforeRoomCreation, "b", "") }) @@ -331,6 +339,17 @@ type eventTime struct { AfterTimestamp time.Time } +// tsBoundaryGuard is the pause inserted around each time.Now() sample in this +// file's helpers and subtests so that no homeserver-stamped origin_server_ts +// can collide with the sample at millisecond granularity. /timestamp_to_event +// returns the boundary event inclusively (forward picks the earliest event +// with ts >= query, backward picks the latest with ts <= query), so a shared +// millisecond between sample and event lets the wrong event win the boundary. +// 2ms is well above the per-millisecond clock resolution every supported +// platform exposes and short enough that the cost is invisible compared to a +// homeserver round-trip. See matrix-org/complement#868. +const tsBoundaryGuard = 2 * time.Millisecond + func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, eventB *eventTime) { t.Helper() @@ -338,6 +357,14 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event "preset": "public_chat", }) + // MustCreateRoom returns once the homeserver has stamped the full + // createRoom state batch (m.room.create through m.room.guest_access); + // the trailing state event can share a millisecond with the time.Now() + // below. Pausing here pushes the sample past that boundary so a forward + // search anchored on timeBeforeEventA cannot land back on the createRoom + // state. See matrix-org/complement#868. + time.Sleep(tsBoundaryGuard) + timeBeforeEventA := time.Now() eventAID := c.SendEventSynced(t, roomID, b.Event{ Type: "m.room.message", From f445822cf779a6ce53687fca8159d4cf82a5764f Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Mon, 1 Jun 2026 23:41:26 +0000 Subject: [PATCH 2/4] `TestJumpToDateEndpoint`: Reduce the boundary guard to 1ms and cover the remaining samples A whole-millisecond pause always carries a timestamp into the next millisecond bucket, so `tsBoundaryGuard` only needs to be 1ms. Extend the guard to the two same-timestamp topological subtests and to `createTestRoom`'s `eventB` sample for consistency, even though no case exercises a forward search from `eventB` yet. Signed-off-by: Jason Volk --- tests/room_timestamp_to_event_test.go | 30 ++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tests/room_timestamp_to_event_test.go b/tests/room_timestamp_to_event_test.go index 6afba207..ad01a737 100644 --- a/tests/room_timestamp_to_event_test.go +++ b/tests/room_timestamp_to_event_test.go @@ -62,7 +62,7 @@ func TestJumpToDateEndpoint(t *testing.T) { // above. A backward search at that millisecond inclusively returns // the create event when the test wants 404. Pause long enough that // every event the homeserver subsequently stamps is in a strictly - // later millisecond. See matrix-org/complement#868. + // later millisecond. time.Sleep(tsBoundaryGuard) roomID, _, _ := createTestRoom(t, alice) mustCheckEventisReturnedForTime(t, alice, roomID, timeBeforeRoomCreation, "b", "") @@ -84,6 +84,11 @@ func TestJumpToDateEndpoint(t *testing.T) { deployment.GetFullyQualifiedHomeserverName(t, "hs1"), }) + // Keep the join in an earlier millisecond than the messages below so + // it cannot win the boundary search; the topological tie-break is + // what this subtest actually exercises. + time.Sleep(tsBoundaryGuard) + // Send a couple messages with the same timestamp after the other test // messages in the room. timeBeforeMessageCreation := time.Now() @@ -106,6 +111,11 @@ func TestJumpToDateEndpoint(t *testing.T) { deployment.GetFullyQualifiedHomeserverName(t, "hs1"), }) + // Keep the join in an earlier millisecond than the messages below so + // it cannot win the boundary search; the topological tie-break is + // what this subtest actually exercises. + time.Sleep(tsBoundaryGuard) + // Send a couple messages with the same timestamp after the other test // messages in the room. timeBeforeMessageCreation := time.Now() @@ -345,10 +355,10 @@ type eventTime struct { // returns the boundary event inclusively (forward picks the earliest event // with ts >= query, backward picks the latest with ts <= query), so a shared // millisecond between sample and event lets the wrong event win the boundary. -// 2ms is well above the per-millisecond clock resolution every supported -// platform exposes and short enough that the cost is invisible compared to a -// homeserver round-trip. See matrix-org/complement#868. -const tsBoundaryGuard = 2 * time.Millisecond +// time.Sleep pauses for at least its argument, and adding a whole millisecond +// to a timestamp always carries it into the next millisecond bucket, so 1ms is +// enough to separate the sample from every event stamped after the pause. +const tsBoundaryGuard = 1 * time.Millisecond func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, eventB *eventTime) { t.Helper() @@ -362,7 +372,7 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event // the trailing state event can share a millisecond with the time.Now() // below. Pausing here pushes the sample past that boundary so a forward // search anchored on timeBeforeEventA cannot land back on the createRoom - // state. See matrix-org/complement#868. + // state. time.Sleep(tsBoundaryGuard) timeBeforeEventA := time.Now() @@ -375,6 +385,12 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event }) timeAfterEventA := time.Now() + // eventB.BeforeTimestamp has to sit in a strictly later millisecond than + // eventA, otherwise a forward search anchored there could land back on + // eventA at a shared boundary millisecond. Nothing exercises this yet, but + // keep the helper honest for callers that eventually will. + time.Sleep(tsBoundaryGuard) + timeBeforeEventB := time.Now() eventBID := c.SendEventSynced(t, roomID, b.Event{ Type: "m.room.message", Content: map[string]interface{}{ @@ -385,7 +401,7 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event timeAfterEventB := time.Now() eventA = &eventTime{EventID: eventAID, BeforeTimestamp: timeBeforeEventA, AfterTimestamp: timeAfterEventA} - eventB = &eventTime{EventID: eventBID, BeforeTimestamp: timeAfterEventA, AfterTimestamp: timeAfterEventB} + eventB = &eventTime{EventID: eventBID, BeforeTimestamp: timeBeforeEventB, AfterTimestamp: timeAfterEventB} return roomID, eventA, eventB } From 2070acf508ebd1ec8a0c177125d7ed574fc88333 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Tue, 2 Jun 2026 21:19:57 +0000 Subject: [PATCH 3/4] `TestJumpToDateEndpoint`: Pair every timestamp sample with a guard sleep Take each `time.Now()` reading and pair `tsBoundaryGuard` with it so the homeserver always stamps the surrounding events in a strictly later millisecond, and apply the same pause to the `timeBeforeRoomCreation` samples the earlier commit left unguarded. `createTestRoom` no longer needs `timeAfterEventA`; `eventA`'s `AfterTimestamp` is now the `timeBeforeEventB` sample, which already sits a guard past `eventA`. Trim the `tsBoundaryGuard` doc comment to match. Signed-off-by: Jason Volk --- tests/room_timestamp_to_event_test.go | 39 +++++++++++---------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/tests/room_timestamp_to_event_test.go b/tests/room_timestamp_to_event_test.go index ad01a737..e1ad162f 100644 --- a/tests/room_timestamp_to_event_test.go +++ b/tests/room_timestamp_to_event_test.go @@ -131,6 +131,7 @@ func TestJumpToDateEndpoint(t *testing.T) { t.Run("should not be able to query a private room you are not a member of", func(t *testing.T) { t.Parallel() timeBeforeRoomCreation := time.Now() + time.Sleep(tsBoundaryGuard) // Alice will create the private room roomID := alice.MustCreateRoom(t, map[string]interface{}{ @@ -159,6 +160,7 @@ func TestJumpToDateEndpoint(t *testing.T) { t.Run("should not be able to query a public room you are not a member of", func(t *testing.T) { t.Parallel() timeBeforeRoomCreation := time.Now() + time.Sleep(tsBoundaryGuard) // Alice will create the public room roomID := alice.MustCreateRoom(t, map[string]interface{}{ @@ -205,6 +207,7 @@ func TestJumpToDateEndpoint(t *testing.T) { t.Run("when looking backwards before the room was created, should be able to find event that was imported", func(t *testing.T) { t.Parallel() timeBeforeRoomCreation := time.Now() + time.Sleep(tsBoundaryGuard) roomID, _, _ := createTestRoom(t, alice) // Join from the application service bridge user so we can use it to send @@ -349,15 +352,14 @@ type eventTime struct { AfterTimestamp time.Time } -// tsBoundaryGuard is the pause inserted around each time.Now() sample in this -// file's helpers and subtests so that no homeserver-stamped origin_server_ts -// can collide with the sample at millisecond granularity. /timestamp_to_event -// returns the boundary event inclusively (forward picks the earliest event -// with ts >= query, backward picks the latest with ts <= query), so a shared -// millisecond between sample and event lets the wrong event win the boundary. -// time.Sleep pauses for at least its argument, and adding a whole millisecond -// to a timestamp always carries it into the next millisecond bucket, so 1ms is -// enough to separate the sample from every event stamped after the pause. +// tsBoundaryGuard is a pause inserted around (before and after) where we create events +// so that `time.Now()` samples and subsequent event `origin_server_ts` don't collide at +// the same millisecond granularity. /timestamp_to_event returns the boundary event +// inclusively (forward picks the earliest event with ts >= query, backward picks the +// latest with ts <= query), so a shared millisecond between events means the wrong +// event can be picked. Adding one whole millisecond to a timestamp always carries it +// into the next millisecond bucket, so 1ms is enough to separate the sample from every +// event stamped after the pause. const tsBoundaryGuard = 1 * time.Millisecond func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, eventB *eventTime) { @@ -367,15 +369,8 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event "preset": "public_chat", }) - // MustCreateRoom returns once the homeserver has stamped the full - // createRoom state batch (m.room.create through m.room.guest_access); - // the trailing state event can share a millisecond with the time.Now() - // below. Pausing here pushes the sample past that boundary so a forward - // search anchored on timeBeforeEventA cannot land back on the createRoom - // state. - time.Sleep(tsBoundaryGuard) - timeBeforeEventA := time.Now() + time.Sleep(tsBoundaryGuard) eventAID := c.SendEventSynced(t, roomID, b.Event{ Type: "m.room.message", Content: map[string]interface{}{ @@ -383,14 +378,10 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event "body": "Message A", }, }) - timeAfterEventA := time.Now() - // eventB.BeforeTimestamp has to sit in a strictly later millisecond than - // eventA, otherwise a forward search anchored there could land back on - // eventA at a shared boundary millisecond. Nothing exercises this yet, but - // keep the helper honest for callers that eventually will. time.Sleep(tsBoundaryGuard) timeBeforeEventB := time.Now() + time.Sleep(tsBoundaryGuard) eventBID := c.SendEventSynced(t, roomID, b.Event{ Type: "m.room.message", Content: map[string]interface{}{ @@ -398,9 +389,11 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event "body": "Message B", }, }) + + time.Sleep(tsBoundaryGuard) timeAfterEventB := time.Now() - eventA = &eventTime{EventID: eventAID, BeforeTimestamp: timeBeforeEventA, AfterTimestamp: timeAfterEventA} + eventA = &eventTime{EventID: eventAID, BeforeTimestamp: timeBeforeEventA, AfterTimestamp: timeBeforeEventB} eventB = &eventTime{EventID: eventBID, BeforeTimestamp: timeBeforeEventB, AfterTimestamp: timeAfterEventB} return roomID, eventA, eventB From 549b1814c9f5138689fdb6f1547f37a21d43149c Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Tue, 2 Jun 2026 21:40:07 +0000 Subject: [PATCH 4/4] `TestJumpToDateEndpoint`: Rewrite the boundary-guard comments for brevity and clarity Signed-off-by: Jason Volk --- tests/room_timestamp_to_event_test.go | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tests/room_timestamp_to_event_test.go b/tests/room_timestamp_to_event_test.go index e1ad162f..fbc83e22 100644 --- a/tests/room_timestamp_to_event_test.go +++ b/tests/room_timestamp_to_event_test.go @@ -56,13 +56,8 @@ func TestJumpToDateEndpoint(t *testing.T) { t.Run("should find nothing before the earliest timestamp", func(t *testing.T) { t.Parallel() timeBeforeRoomCreation := time.Now() - // /timestamp_to_event is millisecond-granular by spec, and the next - // call (createTestRoom) writes an m.room.create whose - // origin_server_ts can land in the same millisecond as the sample - // above. A backward search at that millisecond inclusively returns - // the create event when the test wants 404. Pause long enough that - // every event the homeserver subsequently stamps is in a strictly - // later millisecond. + // Guard so createRoom cannot share this sample's millisecond; a + // backward search there would return m.room.create instead of nothing. time.Sleep(tsBoundaryGuard) roomID, _, _ := createTestRoom(t, alice) mustCheckEventisReturnedForTime(t, alice, roomID, timeBeforeRoomCreation, "b", "") @@ -84,9 +79,7 @@ func TestJumpToDateEndpoint(t *testing.T) { deployment.GetFullyQualifiedHomeserverName(t, "hs1"), }) - // Keep the join in an earlier millisecond than the messages below so - // it cannot win the boundary search; the topological tie-break is - // what this subtest actually exercises. + // Guard so the join cannot share a millisecond with the messages below. time.Sleep(tsBoundaryGuard) // Send a couple messages with the same timestamp after the other test @@ -111,9 +104,7 @@ func TestJumpToDateEndpoint(t *testing.T) { deployment.GetFullyQualifiedHomeserverName(t, "hs1"), }) - // Keep the join in an earlier millisecond than the messages below so - // it cannot win the boundary search; the topological tie-break is - // what this subtest actually exercises. + // Guard so the join cannot share a millisecond with the messages below. time.Sleep(tsBoundaryGuard) // Send a couple messages with the same timestamp after the other test @@ -379,6 +370,8 @@ func createTestRoom(t *testing.T, c *client.CSAPI) (roomID string, eventA, event }, }) + // timeBeforeEventB doubles as eventA's after-timestamp, so guard it on + // both sides to keep it between the two events. time.Sleep(tsBoundaryGuard) timeBeforeEventB := time.Now() time.Sleep(tsBoundaryGuard)