Skip to content

Commit e520a3b

Browse files
authored
Merge pull request #70 from elementsinteractive/backoff
feat(#69): add retries and backoff to slack calls
2 parents 78abb55 + 482e415 commit e520a3b

3 files changed

Lines changed: 178 additions & 14 deletions

File tree

internal/slack/slack.go

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package slack
33
import (
44
"errors"
55
"fmt"
6+
"time"
67

78
"github.com/elliotchance/pie/v2"
89
"github.com/rs/zerolog/log"
@@ -14,7 +15,14 @@ type IService interface {
1415
}
1516

1617
type service struct {
17-
client iclient
18+
client iclient
19+
maxAttempts int
20+
initialBackoff time.Duration
21+
}
22+
23+
type conversationsResult struct {
24+
Channels []slack.Channel
25+
NextCursor string
1826
}
1927

2028
// New creates a new Slack service
@@ -24,19 +32,26 @@ func New(token string, debug bool) (IService, error) {
2432
return nil, errors.New("failed to create slack client")
2533
}
2634

27-
s := service{&client{client: slackClient}}
35+
s := service{
36+
client: &client{client: slackClient},
37+
maxAttempts: 5,
38+
initialBackoff: 2 * time.Second,
39+
}
2840

2941
return &s, nil
3042
}
3143

3244
// PostMessage posts a message to the given slack channel
3345
func (s *service) PostMessage(channelName string, options ...slack.MsgOption) (ts string, err error) {
34-
channel, err := s.findSlackChannel(channelName)
46+
channel, err := runWithRetries(func() (*slack.Channel, error) { return s.findSlackChannel(channelName) }, s.maxAttempts, s.initialBackoff)
3547
if err != nil {
3648
return
3749
}
3850

39-
_, ts, err = s.client.PostMessage(channel.ID, options...)
51+
ts, err = runWithRetries(func() (string, error) {
52+
_, msgTs, err := s.client.PostMessage(channel.ID, options...)
53+
return msgTs, err
54+
}, s.maxAttempts, s.initialBackoff)
4055
if err != nil {
4156
return ts, errors.Join(errors.New("failed to post slack message"), err)
4257
}
@@ -54,15 +69,25 @@ func (s *service) findSlackChannel(channelName string) (channel *slack.Channel,
5469
var channelTypes = []string{"private_channel", "public_channel"}
5570

5671
for {
57-
if channels, nextCursor, err = s.client.GetConversations(&slack.GetConversationsParameters{
58-
ExcludeArchived: true,
59-
Cursor: nextCursor,
60-
Types: channelTypes,
61-
Limit: 1000,
62-
}); err != nil {
63-
return nil, errors.Join(errors.New("failed to get slack channel list"), err)
72+
result, opErr := runWithRetries(func() (conversationsResult, error) {
73+
convChannels, convCursor, convErr := s.client.GetConversations(&slack.GetConversationsParameters{
74+
ExcludeArchived: true,
75+
Cursor: nextCursor,
76+
Types: channelTypes,
77+
Limit: 1000,
78+
})
79+
if convErr != nil {
80+
return conversationsResult{}, convErr
81+
}
82+
return conversationsResult{Channels: convChannels, NextCursor: convCursor}, nil
83+
}, s.maxAttempts, s.initialBackoff)
84+
if opErr != nil {
85+
return nil, errors.Join(errors.New("failed to get slack channel list"), opErr)
6486
}
6587

88+
channels = result.Channels
89+
nextCursor = result.NextCursor
90+
6691
idx := pie.FindFirstUsing(channels, func(c slack.Channel) bool { return c.Name == channelName })
6792
if idx > -1 {
6893
log.Info().Str("channel", channelName).Msg("Found slack channel")
@@ -75,3 +100,46 @@ func (s *service) findSlackChannel(channelName string) (channel *slack.Channel,
75100
log.Debug().Str("channel", channelName).Str("nextPage", nextCursor).Msg("Channel not found in current page, fetching next page")
76101
}
77102
}
103+
104+
func runWithRetries[T any](operation func() (T, error), maxAttempts int, backoff time.Duration) (result T, err error) {
105+
if maxAttempts <= 0 {
106+
maxAttempts = 1
107+
}
108+
109+
for attempt := 1; attempt <= maxAttempts; attempt++ {
110+
result, err = operation()
111+
if err == nil {
112+
return result, nil
113+
}
114+
115+
if attempt == maxAttempts {
116+
break
117+
}
118+
119+
var sleepDuration time.Duration
120+
var rateLimitErr *slack.RateLimitedError
121+
122+
if errors.As(err, &rateLimitErr) {
123+
// Override the standard backoff with Slack's requested wait time
124+
if rateLimitErr.RetryAfter > 0 {
125+
sleepDuration = rateLimitErr.RetryAfter
126+
} else {
127+
// Use exponential backoff: backoff * 2^(attempt-1)
128+
sleepDuration = backoff * time.Duration(1<<(attempt-1))
129+
}
130+
131+
log.Warn().
132+
Err(err).
133+
Int("attempt", attempt).
134+
Dur("retry_after", sleepDuration).
135+
Msg("Hit Slack rate limit, backing off dynamically")
136+
} else {
137+
sleepDuration = backoff * time.Duration(1<<(attempt-1))
138+
log.Warn().Err(err).Int("attempt", attempt).Dur("backoff", sleepDuration).Msg("Operation failed, retrying with exponential backoff")
139+
}
140+
141+
time.Sleep(sleepDuration)
142+
}
143+
144+
return result, fmt.Errorf("operation failed after %d attempts: %w", maxAttempts, err)
145+
}

internal/slack/slack_test.go

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package slack
22

33
import (
4+
"errors"
45
"testing"
6+
"time"
57

68
"github.com/slack-go/slack"
79
"github.com/stretchr/testify/assert"
@@ -35,7 +37,11 @@ func TestPostMessage(t *testing.T) {
3537
)
3638
mockClient.On("PostMessage", channelID, mock.Anything).Return("", "", nil)
3739

38-
svc := service{&mockClient}
40+
svc := service{
41+
client: &mockClient,
42+
maxAttempts: 3,
43+
initialBackoff: 2 * time.Second,
44+
}
3945

4046
_, err := svc.PostMessage(channelName, message)
4147

@@ -66,7 +72,11 @@ func TestFindSlackChannel(t *testing.T) {
6672
nil,
6773
)
6874

69-
svc := service{&mockClient}
75+
svc := service{
76+
client: &mockClient,
77+
maxAttempts: 3,
78+
initialBackoff: 2 * time.Second,
79+
}
7080

7181
channel, err := svc.findSlackChannel(channelName)
7282

@@ -89,3 +99,89 @@ func (c *mockClient) GetConversations(params *slack.GetConversationsParameters)
8999
args := c.Called(params)
90100
return args.Get(0).([]slack.Channel), args.String(1), args.Error(2)
91101
}
102+
103+
// TestPostMessageWithRateLimitRetry verifies retry happens on rate limit errors
104+
func TestPostMessageWithRateLimitRetry(t *testing.T) {
105+
channelID := "test-channel"
106+
107+
mockClient := mockClient{}
108+
mockClient.On("GetConversations", mock.Anything).Return(
109+
[]slack.Channel{
110+
{
111+
GroupConversation: slack.GroupConversation{
112+
Conversation: slack.Conversation{ID: channelID},
113+
Name: "test-channel",
114+
},
115+
},
116+
},
117+
"",
118+
nil,
119+
)
120+
// First call fails with rate limit
121+
mockClient.On("PostMessage", channelID, mock.Anything).Return("", "", errors.New("error: rate limit")).Once()
122+
// Second call succeeds
123+
mockClient.On("PostMessage", channelID, mock.Anything).Return("", "ts123", nil).Once()
124+
125+
// Create service with minimal backoff for testing (1ms instead of 2s)
126+
svc := service{
127+
client: &mockClient,
128+
maxAttempts: 3,
129+
initialBackoff: 1 * time.Microsecond,
130+
}
131+
132+
start := time.Now()
133+
ts, err := svc.PostMessage(channelID, slack.MsgOptionText("test", false))
134+
135+
assert.Nil(t, err)
136+
assert.Equal(t, "ts123", ts)
137+
mockClient.AssertExpectations(t)
138+
139+
// Verify it waited (at least the backoff time, which is now 1ms)
140+
elapsed := time.Since(start)
141+
assert.GreaterOrEqual(t, elapsed, 1*time.Microsecond, "should have waited for backoff")
142+
}
143+
func TestPostMessageWithDynamicRateLimitRetry(t *testing.T) {
144+
channelID := "test-channel"
145+
mockClient := mockClient{}
146+
147+
// Setup mock channel resolution
148+
mockClient.On("GetConversations", mock.Anything).Return(
149+
[]slack.Channel{
150+
{
151+
GroupConversation: slack.GroupConversation{
152+
Conversation: slack.Conversation{ID: channelID},
153+
Name: "test-channel",
154+
},
155+
},
156+
},
157+
"",
158+
nil,
159+
)
160+
161+
expectedWait := 50 * time.Millisecond
162+
rateLimitErr := &slack.RateLimitedError{
163+
RetryAfter: expectedWait,
164+
}
165+
166+
mockClient.On("PostMessage", channelID, mock.Anything).
167+
Return("", "", rateLimitErr).Once()
168+
169+
mockClient.On("PostMessage", channelID, mock.Anything).
170+
Return("", "ts123", nil).Once()
171+
172+
svc := service{
173+
client: &mockClient,
174+
maxAttempts: 3,
175+
initialBackoff: 1 * time.Millisecond,
176+
}
177+
178+
start := time.Now()
179+
ts, err := svc.PostMessage(channelID, slack.MsgOptionText("test", false))
180+
181+
assert.Nil(t, err)
182+
assert.Equal(t, "ts123", ts)
183+
mockClient.AssertExpectations(t)
184+
185+
elapsed := time.Since(start)
186+
assert.GreaterOrEqual(t, elapsed, expectedWait, "should have used Slack's dynamic RetryAfter backoff")
187+
}

lgtm.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
technologies = ["Golang"]
22
categories = ["Correctness", "Quality", "Testing", "Security"]
33
exclude = ["go.mod", "go.sum"]
4-
model = "gemini-2.5-flash-preview-*"
4+
model = "gemini-2.5-pro"
55
silent = false
66
publish = true
77
ai_retries = 2

0 commit comments

Comments
 (0)