Skip to content

Commit 3263c78

Browse files
chrisli30will-dz
andauthored
fix: silence expected user-workflow failures from Sentry error alerts (#526)
Co-authored-by: Will Zimmerman <will@avaprotocol.org>
1 parent d8a8cc2 commit 3263c78

8 files changed

Lines changed: 168 additions & 25 deletions

File tree

aggregator/rpc_server.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,11 @@ func (r *RpcServer) WithdrawFunds(ctx context.Context, payload *avsproto.Withdra
455455
)
456456

457457
if err != nil {
458-
r.config.Logger.Error("failed to send withdrawal UserOp",
458+
// See preset.LogBundlerError: Warn on on-chain revert (user's withdrawal
459+
// reverted — e.g. ERC20 transfer to blacklisted recipient, insufficient
460+
// token balance after race), Error on infra/AA (bundler down, AA21, etc.).
461+
preset.LogBundlerError(r.config.Logger, err,
462+
"failed to send withdrawal UserOp",
459463
"error", err,
460464
"user", user.Address.String(),
461465
"recipient", payload.RecipientAddress,

core/taskengine/engine.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3080,7 +3080,10 @@ func (n *Engine) SimulateTask(user *model.User, trigger *avsproto.TaskTrigger, n
30803080
cleanErrorMsg = stackTraceRegex.ReplaceAllString(cleanErrorMsg, "")
30813081
cleanErrorMsg = strings.TrimSpace(cleanErrorMsg)
30823082

3083-
n.logger.Error("workflow simulation completed with failures",
3083+
// User-workflow simulation failure: per-step errors are captured in the
3084+
// persisted execution steps. Log summary at Warn so it stays out of Sentry
3085+
// error alerts.
3086+
n.logger.Warn("workflow simulation completed with failures",
30843087
"error", cleanErrorMsg,
30853088
"task_id", task.Id,
30863089
"simulation_id", simulationID,

core/taskengine/executor.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,10 @@ func (x *TaskExecutor) RunTask(task *model.Task, queueData *QueueExecutionData)
653653
case ExecutionSuccess:
654654
x.logger.Info("task execution completed successfully", "task_id", task.Id, "execution_id", queueData.ExecutionID, "total_steps", len(vm.ExecutionLogs))
655655
case ExecutionFailed:
656-
x.logger.Error("task execution completed with failures",
656+
// User-workflow failure: per-step errors are already logged at their sites
657+
// and the ExecutionStatus_EXECUTION_STATUS_FAILED is persisted below. Log
658+
// the summary at Warn so it stays out of Sentry error alerts.
659+
x.logger.Warn("task execution completed with failures",
657660
"error", executionError,
658661
"task_id", task.Id,
659662
"execution_id", queueData.ExecutionID,

core/taskengine/tenderly_client.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,10 @@ func (tc *TenderlyClient) SimulateContractWrite(ctx context.Context, contractAdd
10491049
if status, ok := sim["status"].(bool); ok && !status {
10501050
result.Success = false
10511051
if em, ok := sim["error_message"].(string); ok && em != "" {
1052-
tc.logger.Error("❌ Tenderly simulation failed: simulation.status=false",
1052+
// Simulation catching a future revert is the feature working
1053+
// as intended — user-workflow failure, not infra. Log at Warn
1054+
// so it stays out of Sentry error alerts.
1055+
tc.logger.Warn("tenderly simulation failed: simulation.status=false",
10531056
"contract", contractAddress,
10541057
"method", methodName,
10551058
"error_message", em,
@@ -1081,7 +1084,9 @@ func (tc *TenderlyClient) SimulateContractWrite(ctx context.Context, contractAdd
10811084
// Look for error in nested calls (like ERC20 transferFrom failures)
10821085
if errMsg, ok := callMap["error"].(string); ok && errMsg != "" {
10831086
errorMsg = errMsg
1084-
tc.logger.Error("❌ Tenderly simulation failed: transaction reverted",
1087+
// User-workflow revert caught by simulation — log at
1088+
// Warn to keep out of Sentry error alerts.
1089+
tc.logger.Warn("tenderly simulation failed: transaction reverted",
10851090
"contract", contractAddress,
10861091
"method", methodName,
10871092
"error_from_call_trace", errMsg,

core/taskengine/vm_runner_contract_write.go

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,12 @@ func (r *ContractWriteProcessor) executeRealUserOpTransaction(ctx context.Contex
814814
}
815815
}
816816

817-
r.vm.logger.Error("🚫 BUNDLER FAILED - UserOp transaction failed, workflow execution FAILED",
817+
// preset.LogBundlerError picks Error vs Warn based on the error: on-chain
818+
// reverts (expected user-workflow outcomes) log at Warn so they don't page
819+
// Sentry; real infra/AA failures (bundler down, AA21/AA23/AA25, paymaster
820+
// revert) stay at Error.
821+
preset.LogBundlerError(r.vm.logger, err,
822+
"bundler: UserOp transaction failed, workflow execution FAILED",
818823
"bundler_error", err,
819824
"bundler_url", r.smartWalletConfig.BundlerURL,
820825
"method", methodName,
@@ -1209,43 +1214,35 @@ func (r *ContractWriteProcessor) convertTenderlyResultToFlexibleFormat(result *C
12091214

12101215
receipt, _ := structpb.NewValue(receiptMap)
12111216

1212-
// Extract return value from Tenderly response
1217+
// Extract return value from Tenderly response.
1218+
// ReturnData is nil when the provider did not return output data (e.g. simulation
1219+
// reverted — tenderly_client.go clears ReturnData in that case). That path leaves
1220+
// Value as nil, which is the expected behavior.
12131221
var returnValue *structpb.Value
12141222
if result.ReturnData != nil {
1215-
r.vm.logger.Info("🔍 CRITICAL DEBUG - ReturnData found",
1216-
"method", result.MethodName,
1217-
"returnData_name", result.ReturnData.Name,
1218-
"returnData_type", result.ReturnData.Type,
1219-
"returnData_value", result.ReturnData.Value)
1220-
12211223
// Parse the JSON value from ReturnData and convert to protobuf
12221224
var parsedValue interface{}
12231225
if err := json.Unmarshal([]byte(result.ReturnData.Value), &parsedValue); err == nil {
12241226
// Successfully parsed JSON, convert to protobuf
12251227
if valueProto, err := structpb.NewValue(parsedValue); err == nil {
12261228
returnValue = valueProto
1227-
r.vm.logger.Info("✅ CRITICAL DEBUG - Successfully created returnValue protobuf",
1228-
"method", result.MethodName,
1229-
"parsedValue", parsedValue)
12301229
} else {
1231-
r.vm.logger.Error("❌ CRITICAL DEBUG - Failed to create protobuf from parsedValue",
1230+
r.vm.logger.Debug("failed to create protobuf from parsed ReturnData",
12321231
"method", result.MethodName,
12331232
"error", err)
12341233
}
12351234
} else {
1236-
r.vm.logger.Error("❌ CRITICAL DEBUG - Failed to unmarshal JSON from ReturnData.Value",
1235+
// Non-JSON return types (bytes32, address, etc.) are expected; fall through
1236+
// to raw-string handling below.
1237+
r.vm.logger.Debug("ReturnData is not JSON, falling back to raw string",
12371238
"method", result.MethodName,
1238-
"error", err,
1239-
"raw_value", result.ReturnData.Value)
1239+
"error", err)
12401240

12411241
// Fallback: treat as raw string if JSON parsing fails
12421242
if valueProto, err := structpb.NewValue(result.ReturnData.Value); err == nil {
12431243
returnValue = valueProto
12441244
}
12451245
}
1246-
} else {
1247-
r.vm.logger.Error("❌ CRITICAL DEBUG - ReturnData is nil",
1248-
"method", result.MethodName)
12491246
}
12501247

12511248
// No fallback default value. If provider does not return output data, Value remains nil
@@ -1624,7 +1621,11 @@ func (r *ContractWriteProcessor) Execute(stepID string, node *avsproto.ContractW
16241621
}
16251622
}
16261623
} else {
1627-
r.vm.logger.Error("🚨 DEPLOYED WORKFLOW: Method execution failed",
1624+
// User-workflow failure: method returned success=false. The concrete
1625+
// cause is already logged by the upstream site (Tenderly simulation at
1626+
// tenderly_client.go, or bundler/AA at line ~817). Re-logging at Warn
1627+
// here keeps operator-visible context without paging Sentry.
1628+
r.vm.logger.Warn("deployed workflow: method execution failed",
16281629
"method_name", result.MethodName,
16291630
"error_message", result.Error,
16301631
"error_length", len(result.Error),

core/taskengine/vm_runner_eth_transfer.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,9 @@ func (p *ETHTransferProcessor) executeRealETHTransfer(stepID, destination, amoun
344344
)
345345

346346
if err != nil {
347-
p.vm.logger.Error("🚫 BUNDLER FAILED - ETH transfer UserOp transaction failed",
347+
// See preset.LogBundlerError: Warn on on-chain revert, Error on infra/AA.
348+
preset.LogBundlerError(p.vm.logger, err,
349+
"bundler: ETH transfer UserOp transaction failed",
348350
"bundler_error", err,
349351
"bundler_url", p.smartWalletConfig.BundlerURL,
350352
"destination", destination,
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package preset
2+
3+
import (
4+
"strings"
5+
6+
"github.com/AvaProtocol/EigenLayer-AVS/pkg/logger"
7+
)
8+
9+
// userOpRevertMarker identifies errors returned by SendUserOp when the UserOp
10+
// was included on-chain but the target contract call reverted. The marker
11+
// string is emitted from waitForUserOpConfirmation via fmt.Errorf.
12+
const userOpRevertMarker = "success=false in UserOperationEvent"
13+
14+
// IsUserOpRevert reports whether err represents an on-chain revert of the user
15+
// target contract (UserOp was mined but UserOperationEvent.success == false),
16+
// as distinct from infra/AA failures such as bundler unreachable, AA21 prefund,
17+
// AA23 reverted, AA25 invalid nonce, or paymaster revert.
18+
//
19+
// On-chain reverts are expected user-workflow outcomes and should not escalate
20+
// to Sentry error alerts. Infra/AA failures should.
21+
func IsUserOpRevert(err error) bool {
22+
if err == nil {
23+
return false
24+
}
25+
return strings.Contains(err.Error(), userOpRevertMarker)
26+
}
27+
28+
// LogBundlerError logs a bundler/UserOp failure at the severity appropriate
29+
// for its cause: Warn for on-chain reverts (see IsUserOpRevert) so they do not
30+
// page Sentry, Error for real infra/AA failures that operators must see.
31+
//
32+
// Callers pass the error both for classification (the first argument) and,
33+
// conventionally, as a tag value so the logged record includes the full error.
34+
func LogBundlerError(lgr logger.Logger, err error, msg string, tags ...any) {
35+
if lgr == nil {
36+
return
37+
}
38+
if IsUserOpRevert(err) {
39+
lgr.Warn(msg, tags...)
40+
return
41+
}
42+
lgr.Error(msg, tags...)
43+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package preset
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"sync"
7+
"testing"
8+
9+
sdklogging "github.com/Layr-Labs/eigensdk-go/logging"
10+
)
11+
12+
func TestIsUserOpRevert(t *testing.T) {
13+
cases := []struct {
14+
name string
15+
err error
16+
want bool
17+
}{
18+
{"nil", nil, false},
19+
{"unrelated", errors.New("dial tcp: connection refused"), false},
20+
{"AA21 prefund", errors.New("AA21 didn't pay prefund"), false},
21+
{"AA25 nonce", errors.New("AA25 invalid account nonce"), false},
22+
{"direct marker", errors.New("UserOp execution failed (success=false in UserOperationEvent) - tx: 0xabc"), true},
23+
{"wrapped marker", fmt.Errorf("UserOp execution failed: %w", errors.New("UserOp execution failed (success=false in UserOperationEvent) - tx: 0xabc")), true},
24+
}
25+
for _, tc := range cases {
26+
t.Run(tc.name, func(t *testing.T) {
27+
if got := IsUserOpRevert(tc.err); got != tc.want {
28+
t.Errorf("IsUserOpRevert(%v) = %v, want %v", tc.err, got, tc.want)
29+
}
30+
})
31+
}
32+
}
33+
34+
// bundlerErrorSpyLogger captures which severity method was invoked for LogBundlerError.
35+
type bundlerErrorSpyLogger struct {
36+
mu sync.Mutex
37+
calls []string // method names in order
38+
}
39+
40+
func (s *bundlerErrorSpyLogger) record(method string) {
41+
s.mu.Lock()
42+
defer s.mu.Unlock()
43+
s.calls = append(s.calls, method)
44+
}
45+
46+
func (s *bundlerErrorSpyLogger) Debug(string, ...any) {}
47+
func (s *bundlerErrorSpyLogger) Debugf(string, ...any) {}
48+
func (s *bundlerErrorSpyLogger) Info(string, ...any) {}
49+
func (s *bundlerErrorSpyLogger) Infof(string, ...any) {}
50+
func (s *bundlerErrorSpyLogger) Warn(string, ...any) { s.record("Warn") }
51+
func (s *bundlerErrorSpyLogger) Warnf(string, ...any) { s.record("Warn") }
52+
func (s *bundlerErrorSpyLogger) Error(string, ...any) { s.record("Error") }
53+
func (s *bundlerErrorSpyLogger) Errorf(string, ...any) { s.record("Error") }
54+
func (s *bundlerErrorSpyLogger) Fatal(string, ...any) {}
55+
func (s *bundlerErrorSpyLogger) Fatalf(string, ...any) {}
56+
func (s *bundlerErrorSpyLogger) With(...any) sdklogging.Logger { return s }
57+
58+
func TestLogBundlerError(t *testing.T) {
59+
cases := []struct {
60+
name string
61+
err error
62+
want string
63+
}{
64+
{"on-chain revert → Warn", errors.New("UserOp execution failed (success=false in UserOperationEvent) - tx: 0xabc"), "Warn"},
65+
{"AA21 infra → Error", errors.New("AA21 didn't pay prefund"), "Error"},
66+
{"bundler down → Error", errors.New("dial tcp: connection refused"), "Error"},
67+
}
68+
for _, tc := range cases {
69+
t.Run(tc.name, func(t *testing.T) {
70+
spy := &bundlerErrorSpyLogger{}
71+
LogBundlerError(spy, tc.err, "bundler failed", "err", tc.err)
72+
if len(spy.calls) != 1 || spy.calls[0] != tc.want {
73+
t.Errorf("expected single %s call, got %v", tc.want, spy.calls)
74+
}
75+
})
76+
}
77+
}
78+
79+
func TestLogBundlerError_NilLogger(t *testing.T) {
80+
// Must not panic.
81+
LogBundlerError(nil, errors.New("anything"), "msg")
82+
}

0 commit comments

Comments
 (0)