Skip to content

Commit f85bfe3

Browse files
authored
Add Dockerfile for all SDK e2e tests, fix flaky e2e tests (#3846)
1 parent 1f09b00 commit f85bfe3

41 files changed

Lines changed: 710 additions & 409 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/e2e-image.yml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
name: e2e-image
2+
on:
3+
push:
4+
branches:
5+
- main
6+
paths:
7+
- "sdks/go/**"
8+
- "sdks/python/**"
9+
- "sdks/typescript/**"
10+
- "sdks/ruby/**"
11+
12+
jobs:
13+
build-push-e2e-go:
14+
name: hatchet-e2e-go
15+
runs-on: ubuntu-latest
16+
steps:
17+
- name: Checkout
18+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
19+
- name: Login to GHCR
20+
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
21+
- name: Build and push
22+
run: |
23+
DOCKER_BUILDKIT=1 docker build -f ./build/package/e2e-go.dockerfile \
24+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-go:${{ github.sha }} \
25+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-go:main \
26+
--platform linux/amd64 \
27+
.
28+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-go:${{ github.sha }}
29+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-go:main
30+
31+
build-push-e2e-python:
32+
name: hatchet-e2e-python
33+
runs-on: ubuntu-latest
34+
steps:
35+
- name: Checkout
36+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
37+
- name: Login to GHCR
38+
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
39+
- name: Build and push
40+
run: |
41+
DOCKER_BUILDKIT=1 docker build -f ./build/package/e2e-python.dockerfile \
42+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-python:${{ github.sha }} \
43+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-python:main \
44+
--platform linux/amd64 \
45+
.
46+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-python:${{ github.sha }}
47+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-python:main
48+
49+
build-push-e2e-typescript:
50+
name: hatchet-e2e-typescript
51+
runs-on: ubuntu-latest
52+
steps:
53+
- name: Checkout
54+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
55+
- name: Login to GHCR
56+
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
57+
- name: Build and push
58+
run: |
59+
DOCKER_BUILDKIT=1 docker build -f ./build/package/e2e-typescript.dockerfile \
60+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-typescript:${{ github.sha }} \
61+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-typescript:main \
62+
--platform linux/amd64 \
63+
.
64+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-typescript:${{ github.sha }}
65+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-typescript:main
66+
67+
build-push-e2e-ruby:
68+
name: hatchet-e2e-ruby
69+
runs-on: ubuntu-latest
70+
steps:
71+
- name: Checkout
72+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
73+
- name: Login to GHCR
74+
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
75+
- name: Build and push
76+
run: |
77+
DOCKER_BUILDKIT=1 docker build -f ./build/package/e2e-ruby.dockerfile \
78+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-ruby:${{ github.sha }} \
79+
-t ghcr.io/hatchet-dev/hatchet/hatchet-e2e-ruby:main \
80+
--platform linux/amd64 \
81+
.
82+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-ruby:${{ github.sha }}
83+
docker push ghcr.io/hatchet-dev/hatchet/hatchet-e2e-ruby:main

.github/workflows/sdk-python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ jobs:
147147
- name: Run pytest
148148
run: |
149149
echo "Using HATCHET_CLIENT_NAMESPACE: $HATCHET_CLIENT_NAMESPACE"
150-
150+
export HATCHET_CLIENT_SERVER_URL=http://localhost:8080
151151
poetry run pytest -s -vvv --maxfail=5 --capture=no --retries 3 --retry-delay 2 -n 8
152152
153153
- name: Test with wheel

build/package/e2e-go.dockerfile

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Base Go environment
2+
# -------------------
3+
FROM golang:1.25-alpine as base
4+
WORKDIR /hatchet
5+
6+
COPY go.mod go.sum ./
7+
8+
RUN go mod download
9+
10+
COPY /pkg ./pkg
11+
COPY /internal ./internal
12+
COPY /api ./api
13+
COPY /sdks/go ./sdks/go
14+
15+
# Go build environment
16+
# --------------------
17+
FROM base AS build-go
18+
19+
RUN go test -c -tags e2e -v -o ./bin/e2e-test ./sdks/go/e2e/
20+
21+
# Deployment environment
22+
# ----------------------
23+
FROM alpine AS deployment
24+
25+
WORKDIR /hatchet
26+
27+
RUN apk update && apk add --no-cache ca-certificates tzdata
28+
29+
COPY --from=build-go /hatchet/bin/e2e-test /hatchet/
30+
31+
CMD ["/hatchet/e2e-test", "-test.v", "-test.timeout=10m"]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Base Python environment
2+
# -----------------------
3+
FROM python:3.13-slim AS deployment
4+
5+
WORKDIR /hatchet/sdks/python
6+
7+
RUN pip install --no-cache-dir poetry==2.3.0
8+
9+
COPY sdks/python/ .
10+
11+
RUN poetry install --no-interaction --all-extras
12+
13+
CMD ["poetry", "run", "pytest", "-s", "-vvv", "-x", "--capture=no"]
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/sh
2+
3+
WORKER_PID=""
4+
FINAL_EXIT=0
5+
6+
cleanup() {
7+
if [ -n "$WORKER_PID" ]; then
8+
kill -9 "$WORKER_PID" 2>/dev/null || true
9+
wait "$WORKER_PID" 2>/dev/null || true
10+
fi
11+
}
12+
trap cleanup EXIT INT TERM
13+
14+
echo "=== Running integration tests ==="
15+
cd /hatchet/sdks/ruby/src
16+
timeout 300 bundle exec rspec spec/integration/ --format documentation --tag integration
17+
INTEGRATION_EXIT=$?
18+
[ $INTEGRATION_EXIT -ne 0 ] && FINAL_EXIT=$INTEGRATION_EXIT
19+
20+
echo "=== Starting example worker ==="
21+
cd /hatchet/sdks/ruby/examples
22+
HATCHET_CLIENT_WORKER_HEALTHCHECK_ENABLED=true \
23+
HATCHET_CLIENT_WORKER_HEALTHCHECK_PORT=8001 \
24+
bundle exec ruby worker.rb &
25+
WORKER_PID=$!
26+
27+
for i in $(seq 1 60); do
28+
if curl -sf http://localhost:8001/health > /dev/null 2>&1; then
29+
echo "Worker healthy after ${i}s"
30+
break
31+
fi
32+
if [ "$i" -eq 60 ]; then
33+
echo "Worker failed to start within 60s"
34+
exit 1
35+
fi
36+
sleep 1
37+
done
38+
39+
echo "=== Running e2e tests ==="
40+
timeout 1200 bundle exec rspec -f d --fail-fast
41+
E2E_EXIT=$?
42+
[ $E2E_EXIT -ne 0 ] && FINAL_EXIT=$E2E_EXIT
43+
44+
exit $FINAL_EXIT

build/package/e2e-ruby.dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Base Ruby environment
2+
# ---------------------
3+
FROM ruby:3.2 AS deployment
4+
5+
WORKDIR /hatchet/sdks/ruby
6+
7+
RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
8+
9+
COPY sdks/ruby/src/ src/
10+
RUN cd src && bundle lock --add-platform x86_64-linux && bundle install
11+
12+
COPY sdks/ruby/examples/ examples/
13+
RUN cd examples && bundle lock --add-platform x86_64-linux && bundle install
14+
15+
COPY build/package/e2e-ruby-entrypoint.sh /entrypoint.sh
16+
RUN chmod +x /entrypoint.sh
17+
18+
CMD ["/entrypoint.sh"]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Base Node environment
2+
# ---------------------
3+
FROM node:20-alpine AS deployment
4+
5+
WORKDIR /hatchet/sdks/typescript
6+
7+
RUN corepack enable && corepack prepare pnpm@10.16.1 --activate
8+
9+
COPY sdks/typescript/ .
10+
11+
RUN pnpm install --frozen-lockfile
12+
13+
CMD ["pnpm", "run", "test:e2e"]

examples/python/durable_eviction/worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ async def capacity_evictable_sleep(
149149
)
150150
async def non_evictable_sleep(input: EmptyModel, ctx: DurableContext) -> dict[str, Any]:
151151
"""Has eviction disabled -- should never be evicted."""
152-
await ctx.aio_sleep_for(timedelta(seconds=10))
152+
await ctx.aio_sleep_for(timedelta(seconds=30))
153153
return {"status": "completed"}
154154

155155

pkg/worker/context.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/hatchet-dev/hatchet/pkg/client"
2121
"github.com/hatchet-dev/hatchet/pkg/client/create"
2222
"github.com/hatchet-dev/hatchet/pkg/client/types"
23+
clientconfig "github.com/hatchet-dev/hatchet/pkg/config/client"
2324
"github.com/hatchet-dev/hatchet/pkg/worker/condition"
2425
)
2526

@@ -1061,6 +1062,8 @@ func (d *durableHatchetContext) SleepFor(duration time.Duration) (*SingleWaitRes
10611062

10621063
// WaitForEvent implements the DurableHatchetContext.WaitForEvent method.
10631064
func (d *durableHatchetContext) WaitForEvent(eventKey, expression string) (*SingleWaitResult, error) {
1065+
namespace := d.c.Namespace()
1066+
eventKey = clientconfig.ApplyNamespace(eventKey, &namespace)
10641067
wr, err := d.waitFor(condition.UserEventCondition(eventKey, expression), "wait_for_event", eventKey)
10651068

10661069
if err != nil {

sdks/go/e2e/durable_test.go

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ func TestDurableWorkflow(t *testing.T) {
4444

4545
id := uniqueID()
4646

47-
time.Sleep(time.Duration(sleepTime+10) * time.Second)
47+
// Wait for the run to start, then let the internal SleepFor(sleepTime) finish before pushing the event.
48+
pollUntilRunStatus(t, ctx, sharedClient, ref.RunId, string(rest.V1TaskStatusRUNNING))
49+
time.Sleep(time.Duration(sleepTime+3) * time.Second)
4850

4951
err = sharedClient.Events().Push(ctx, eventKey, AwaitedEvent{ID: id})
5052
require.NoError(t, err)
@@ -68,15 +70,21 @@ func TestDurableSleepCancelReplay(t *testing.T) {
6870
ref, err := testWaitForSleepTwice.RunNoWait(ctx, EmptyInput{})
6971
require.NoError(t, err)
7072

71-
time.Sleep(time.Duration(sleepTime/2) * time.Second)
73+
pollUntilRunStatus(t, ctx, sharedClient, ref.RunId, string(rest.V1TaskStatusRUNNING))
7274

7375
_, err = sharedClient.Runs().Cancel(ctx, rest.V1CancelTaskRequest{
7476
ExternalIds: toUUIDs(ref.RunId),
7577
})
7678
require.NoError(t, err)
7779

78-
// Wait for cancellation
79-
time.Sleep(2 * time.Second)
80+
// Wait for cancellation to propagate before replaying.
81+
pollUntil(t, ctx, func() (bool, error) {
82+
status, err := sharedClient.Runs().GetStatus(ctx, ref.RunId)
83+
if err != nil {
84+
return false, err
85+
}
86+
return *status == rest.V1TaskStatusCANCELLED, nil
87+
})
8088

8189
replayStart := time.Now()
8290
_, err = sharedClient.Runs().Replay(ctx, rest.V1ReplayTaskRequest{
@@ -92,8 +100,8 @@ func TestDurableSleepCancelReplay(t *testing.T) {
92100
err = result.TaskOutput("wait-for-sleep-twice").Into(&output)
93101
require.NoError(t, err)
94102

95-
assert.Less(t, output["runtime"], float64(sleepTime))
96-
assert.LessOrEqual(t, replayElapsed, float64(sleepTime))
103+
assert.Less(t, output["runtime"], float64(sleepTime)+timingTolerance)
104+
assert.LessOrEqual(t, replayElapsed, float64(sleepTime)+timingTolerance)
97105
}
98106

99107
func TestDurableChildSpawn(t *testing.T) {
@@ -122,6 +130,7 @@ func TestDurableChildBulkSpawn(t *testing.T) {
122130
require.NoError(t, err)
123131
outputs, ok := m["child_outputs"].([]any)
124132
require.True(t, ok, "expected child_outputs to be an array")
133+
125134
assert.GreaterOrEqual(t, len(outputs), n-1)
126135
assert.LessOrEqual(t, len(outputs), n)
127136

@@ -145,7 +154,9 @@ func TestDurableSleepEventSpawnReplay(t *testing.T) {
145154
ref, err := testDurableSleepEventSpawn.RunNoWait(ctx, EmptyInput{})
146155
require.NoError(t, err)
147156

148-
time.Sleep(time.Duration(sleepTime+5) * time.Second)
157+
// Wait for the run to start, then let the internal SleepFor(sleepTime) finish before pushing the event.
158+
pollUntilRunStatus(t, ctx, sharedClient, ref.RunId, string(rest.V1TaskStatusRUNNING))
159+
time.Sleep(time.Duration(sleepTime+3) * time.Second)
149160
err = sharedClient.Events().Push(ctx, eventKey, map[string]string{"test": "test"})
150161
require.NoError(t, err)
151162

@@ -173,7 +184,7 @@ func TestDurableSleepEventSpawnReplay(t *testing.T) {
173184
replayChild, ok := rm["child_output"].(map[string]any)
174185
require.True(t, ok)
175186
assert.Equal(t, "hello from child 1", replayChild["message"])
176-
assert.Less(t, replayElapsed, float64(sleepTime))
187+
assert.Less(t, replayElapsed, float64(sleepTime)+timingTolerance)
177188
}
178189

179190
func TestDurableCompletedReplay(t *testing.T) {
@@ -207,8 +218,8 @@ func TestDurableCompletedReplay(t *testing.T) {
207218
var replayOutput map[string]float64
208219
err = replayResult.TaskOutput("wait-for-sleep-twice").Into(&replayOutput)
209220
require.NoError(t, err)
210-
assert.Less(t, replayOutput["runtime"], float64(sleepTime))
211-
assert.Less(t, elapsed, float64(sleepTime))
221+
assert.Less(t, replayOutput["runtime"], float64(sleepTime)+timingTolerance)
222+
assert.Less(t, elapsed, float64(sleepTime)+timingTolerance)
212223
}
213224

214225
func TestDurableSpawnDAG(t *testing.T) {
@@ -306,7 +317,7 @@ func TestDurableReplayReset(t *testing.T) {
306317
durations := []float64{resetOutput.Sleep1Duration, resetOutput.Sleep2Duration, resetOutput.Sleep3Duration}
307318
for i, d := range durations {
308319
if int64(i+1) < nodeID {
309-
assert.Less(t, d, float64(replayResetSleepTime))
320+
assert.Less(t, d, float64(replayResetSleepTime)+timingTolerance)
310321
} else {
311322
assert.GreaterOrEqual(t, d, float64(replayResetSleepTime))
312323
}
@@ -369,7 +380,7 @@ func TestDurableBranchingOffBranch(t *testing.T) {
369380
err = resetResult2.TaskOutput("durable-replay-reset").Into(&resetOutput2)
370381
require.NoError(t, err)
371382

372-
assert.Less(t, resetOutput2.Sleep1Duration, float64(replayResetSleepTime))
383+
assert.Less(t, resetOutput2.Sleep1Duration, float64(replayResetSleepTime)+timingTolerance)
373384
assert.GreaterOrEqual(t, resetOutput2.Sleep2Duration, float64(replayResetSleepTime))
374385
assert.GreaterOrEqual(t, resetOutput2.Sleep3Duration, float64(replayResetSleepTime))
375386
assert.GreaterOrEqual(t, resetElapsed2, float64(2*replayResetSleepTime))
@@ -407,7 +418,7 @@ func TestDurableMemoizationViaReplay(t *testing.T) {
407418
require.NoError(t, err)
408419

409420
assert.GreaterOrEqual(t, duration1, float64(sleepTime))
410-
assert.Less(t, duration2, 1.0)
421+
assert.Less(t, duration2, 1.1)
411422
assert.Equal(t, output1.Message, output2.Message)
412423
}
413424

0 commit comments

Comments
 (0)