Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
3a43fc7
fix(sandbox): sanitize @ in container names for WhatsApp LID chat IDs
kamushadenes Apr 24, 2026
a0fb07e
fix(cron): always reset session before cron runs (stateless bug)
kamushadenes Apr 24, 2026
b9b8f86
fix(exec): detect credentialed CLIs in shell operator chains
kamushadenes Apr 24, 2026
13f4cb2
feat(exec): add allow_chain_exec flag for credentialed CLI chain inje…
kamushadenes Apr 24, 2026
ce11fb4
fix: sanitize @ in sandbox names
kamushadenes Apr 24, 2026
90948f5
fix: always reset cron sessions
kamushadenes Apr 24, 2026
160a7ad
feat: chain exec + allow_chain_exec
kamushadenes Apr 24, 2026
b21495e
fix: bump RequiredSchemaVersion to 57 for allow_chain_exec migration
kamushadenes Apr 24, 2026
e25a317
fix: bump schema version to 57
kamushadenes Apr 24, 2026
ef710d2
fix: add missing $16 placeholder in secure_cli INSERT
kamushadenes Apr 25, 2026
0842150
fix: missing $16 placeholder in secure_cli INSERT
kamushadenes Apr 25, 2026
73d8432
fix: add missing 16th placeholder in SQLite secure_cli INSERT
kamushadenes Apr 25, 2026
9a2f847
fix: SQLite INSERT placeholder count
kamushadenes Apr 25, 2026
f89cb1e
feat(sandbox): mount data volume read-only for skills/config access
kamushadenes Apr 25, 2026
ffc464c
feat: mount data volume ro in sandbox
kamushadenes Apr 25, 2026
eac0806
perf(providers): cache conversation history on Anthropic requests
kamushadenes Apr 27, 2026
6615c77
feat(hooks): surface script reason in synthetic block messages
kamushadenes Apr 27, 2026
422e707
feat(sandbox): add AllowTmpExec opt-out for tmpfs noexec
kamushadenes Apr 28, 2026
8f4b581
fix(sandbox): use explicit exec flag in tmpfs to override Docker's de…
kamushadenes Apr 28, 2026
37ebe15
fix(whatsapp): scope whatsmeow device per channel instance
kamushadenes Apr 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ func runGateway() {
instanceLoader.RegisterFactory(channels.TypeFeishu, feishu.FactoryWithPendingStoreAndAudio(pgStores.PendingMessages, audioMgr))
instanceLoader.RegisterFactory(channels.TypeZaloOA, zalo.Factory)
instanceLoader.RegisterFactory(channels.TypeZaloPersonal, zalopersonal.FactoryWithPendingStore(pgStores.PendingMessages))
instanceLoader.RegisterFactory(channels.TypeWhatsApp, whatsapp.FactoryWithDBAudio(pgStores.DB, pgStores.PendingMessages, "pgx", audioMgr, pgStores.BuiltinTools))
instanceLoader.RegisterFactory(channels.TypeWhatsApp, whatsapp.FactoryWithDBAudio(pgStores.DB, pgStores.PendingMessages, "pgx", audioMgr, pgStores.BuiltinTools, pgStores.ChannelInstances))
instanceLoader.RegisterFactory(channels.TypeSlack, slackchannel.FactoryWithPendingStore(pgStores.PendingMessages))
instanceLoader.RegisterFactory(channels.TypeFacebook, facebook.Factory)
instanceLoader.RegisterFactory(channels.TypePancake, pancake.Factory)
Expand Down
6 changes: 5 additions & 1 deletion cmd/gateway_channels_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ func registerConfigChannels(cfg *config.Config, channelMgr *channels.Manager, ms
if strings.Contains(fmt.Sprintf("%T", pgStores.DB.Driver()), "sqlite") {
waDialect = "sqlite3"
}
wa, err := whatsapp.New(cfg.Channels.WhatsApp, msgBus, pgStores.Pairing, pgStores.DB, pgStores.PendingMessages, waDialect, audioMgr, pgStores.BuiltinTools)
// Config-only WhatsApp (single instance, no DB-backed channel_instances row);
// no instance store, no configJID — falls back to GetFirstDevice via NewDevice
// adoption when the legacy single-device store already exists.
wa, err := whatsapp.New(cfg.Channels.WhatsApp, msgBus, pgStores.Pairing, pgStores.DB,
pgStores.PendingMessages, waDialect, audioMgr, pgStores.BuiltinTools, nil, "")
if err != nil {
channelMgr.RecordFailure(channels.TypeWhatsApp, "", err)
slog.Error("failed to initialize whatsapp channel", "error", err)
Expand Down
7 changes: 5 additions & 2 deletions cmd/gateway_cron.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,11 @@ func makeCronJobHandler(sched *scheduler.Scheduler, msgBus *bus.MessageBus, cfg
// Reset session before each cron run to prevent tool errors from previous
// runs from polluting the context and blocking future executions (#294).
// Save() persists the empty session to DB so stale data won't reload after restart.
// Stateless jobs skip this — they intentionally carry no session history.
if !job.Stateless {
// Always reset cron sessions to prevent message accumulation across runs.
// Stateless jobs especially need this — the agent loop persists messages
// to the session regardless of the stateless flag, so without a reset
// the session grows indefinitely.
{
sessionMgr.Reset(cronCtx, sessionKey)
sessionMgr.Save(cronCtx, sessionKey)
}
Expand Down
6 changes: 6 additions & 0 deletions internal/channels/instance_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,12 @@ func (l *InstanceLoader) loadInstance(ctx context.Context, inst store.ChannelIns
if base, ok := ch.(interface{ SetTenantID(uuid.UUID) }); ok {
base.SetTenantID(inst.TenantID)
}
// Propagate instance_id so channels that maintain per-instance external state
// (e.g. WhatsApp's whatsmeow_device row scoped to this channel) can persist it
// back to channel_instances.config.
if base, ok := ch.(interface{ SetInstanceID(uuid.UUID) }); ok {
base.SetInstanceID(inst.ID)
}
// Propagate tenant_id to pending history for compaction/sweep DB operations.
// Factory creates PendingHistory before SetTenantID is called, so tenantID is uuid.Nil at construction.
if ph, ok := ch.(interface{ SetPendingHistoryTenantID(uuid.UUID) }); ok {
Expand Down
13 changes: 7 additions & 6 deletions internal/channels/whatsapp/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func (c *Channel) StartQRFlow(ctx context.Context) (<-chan whatsmeow.QRChannelIt
if c.ctx == nil {
c.ctx, c.cancel = context.WithCancel(context.Background())
}
deviceStore, err := c.container.GetFirstDevice(ctx)
deviceStore, err := c.resolveDevice(ctx)
if err != nil {
c.mu.Unlock()
return nil, fmt.Errorf("whatsapp get device: %w", err)
Expand Down Expand Up @@ -90,11 +90,12 @@ func (c *Channel) Reauth() error {
}
c.ctx, c.cancel = context.WithCancel(parent)

// Re-create client with fresh device store.
deviceStore, err := c.container.GetFirstDevice(context.Background())
if err != nil {
return fmt.Errorf("whatsapp: get fresh device: %w", err)
}
// Re-create client with a fresh device. Reauth always forces a new pairing,
// so we bypass resolveDevice (which would try to adopt an existing device).
// configJID is also cleared so the next persistJID on PairSuccess writes the
// new JID into channel_instances.config without short-circuiting on equality.
c.configJID = ""
deviceStore := c.container.NewDevice()
c.client = whatsmeow.NewClient(deviceStore, nil)
c.client.AddEventHandler(c.handleEvent)

Expand Down
16 changes: 12 additions & 4 deletions internal/channels/whatsapp/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,25 @@ type whatsappInstanceConfig struct {
HistoryLimit int `json:"history_limit,omitempty"`
AllowFrom []string `json:"allow_from,omitempty"`
BlockReply *bool `json:"block_reply,omitempty"`
// JID is the whatsmeow device JID this instance was paired with on a prior boot.
// Set automatically on PairSuccess and on adoption of an existing single-device
// store. Empty for fresh instances; the channel will NewDevice + go through QR.
JID string `json:"jid,omitempty"`
}

// FactoryWithDB returns a ChannelFactory with DB access for whatsmeow auth state.
// dialect must be "pgx" (PostgreSQL) or "sqlite3" (SQLite/desktop).
func FactoryWithDB(db *sql.DB, pendingStore store.PendingMessageStore, dialect string) channels.ChannelFactory {
return FactoryWithDBAudio(db, pendingStore, dialect, nil, nil)
return FactoryWithDBAudio(db, pendingStore, dialect, nil, nil, nil)
}

// FactoryWithDBAudio returns a ChannelFactory with DB access, STT support, and builtin-tools store
// for reading stt.whatsapp_enabled opt-in setting per message.
// for reading stt.whatsapp_enabled opt-in setting per message. instanceStore is optional but
// required for multi-instance device scoping (passed from cmd/gateway.go); nil falls back to
// legacy single-instance GetFirstDevice behavior.
func FactoryWithDBAudio(db *sql.DB, pendingStore store.PendingMessageStore, dialect string,
audioMgr *audio.Manager, builtinToolStore store.BuiltinToolStore) channels.ChannelFactory {
audioMgr *audio.Manager, builtinToolStore store.BuiltinToolStore,
instanceStore store.ChannelInstanceStore) channels.ChannelFactory {
return func(name string, creds json.RawMessage, cfg json.RawMessage,
msgBus *bus.MessageBus, pairingSvc store.PairingStore) (channels.Channel, error) {

Expand Down Expand Up @@ -72,7 +79,8 @@ func FactoryWithDBAudio(db *sql.DB, pendingStore store.PendingMessageStore, dial
waCfg.GroupPolicy = "pairing"
}

ch, err := New(waCfg, msgBus, pairingSvc, db, pendingStore, dialect, audioMgr, builtinToolStore)
ch, err := New(waCfg, msgBus, pairingSvc, db, pendingStore, dialect, audioMgr, builtinToolStore,
instanceStore, ic.JID)
if err != nil {
return nil, err
}
Expand Down
188 changes: 185 additions & 3 deletions internal/channels/whatsapp/whatsapp.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package whatsapp
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"log/slog"
"sync"
"time"

"github.com/google/uuid"
"go.mau.fi/whatsmeow"
wastore "go.mau.fi/whatsmeow/store"
"go.mau.fi/whatsmeow/store/sqlstore"
Expand Down Expand Up @@ -58,6 +60,16 @@ type Channel struct {
// reauthMu serializes Reauth() and StartQRFlow() to prevent race when user clicks reauth rapidly.
reauthMu sync.Mutex
// pairingService, pairingDebounce, approvedGroups, groupHistory are inherited from channels.BaseChannel.

// instanceID + instanceStore scope this channel to a specific channel_instances row,
// so multiple WhatsApp channels in one deploy each bind to their own whatsmeow_device row.
// Without this, every channel reused the first device returned by GetFirstDevice and ended
// up logged in as the same WhatsApp account regardless of name.
instanceID uuid.UUID
instanceStore store.ChannelInstanceStore
// configJID is the device JID this channel adopted on a previous run, mirrored from the
// instance's config jsonb (key "jid"). Empty when the channel has never paired.
configJID string
}

// GetLastQRB64 returns the most recent QR PNG (base64).
Expand Down Expand Up @@ -85,10 +97,15 @@ func (c *Channel) cacheQR(pngB64 string) {
// dialect must be "pgx" (PostgreSQL) or "sqlite3" (SQLite/desktop).
// audioMgr is optional (nil = STT disabled).
// builtinToolStore is optional (nil = STT permanently opt-out regardless of admin toggle).
// instanceStore is optional but required for multi-instance device scoping; without it,
// the channel falls back to GetFirstDevice (legacy single-instance behavior).
// configJID is the JID adopted on a prior run (from instance config "jid"); empty for
// fresh instances that should NewDevice + QR.
func New(cfg config.WhatsAppConfig, msgBus *bus.MessageBus,
pairingSvc store.PairingStore, db *sql.DB,
pendingStore store.PendingMessageStore, dialect string, audioMgr *audio.Manager,
builtinToolStore store.BuiltinToolStore) (*Channel, error) {
builtinToolStore store.BuiltinToolStore,
instanceStore store.ChannelInstanceStore, configJID string) (*Channel, error) {

base := channels.NewBaseChannel(channels.TypeWhatsApp, msgBus, cfg.AllowFrom)
base.ValidatePolicy(cfg.DMPolicy, cfg.GroupPolicy)
Expand All @@ -104,12 +121,169 @@ func New(cfg config.WhatsAppConfig, msgBus *bus.MessageBus,
container: container,
audioMgr: audioMgr,
builtinToolStore: builtinToolStore,
instanceStore: instanceStore,
configJID: configJID,
}
ch.SetPairingService(pairingSvc)
ch.SetGroupHistory(channels.MakeHistory("whatsapp", pendingStore, base.TenantID()))
return ch, nil
}

// SetInstanceID associates this channel with its channel_instances row.
// Called by InstanceLoader after construction so we can persist the paired JID
// back to the row's config jsonb on PairSuccess.
func (c *Channel) SetInstanceID(id uuid.UUID) { c.instanceID = id }

// resolveDevice returns the *store.Device this channel should use, scoped to the
// channel_instances row identified by configJID/instanceID. Three paths:
// 1. configJID set + device exists in whatsmeow_device → reuse it.
// 2. configJID empty + adoption succeeds → adopt an unclaimed orphan device
// (covers single-channel deploys upgrading to multi-channel without re-pair).
// 3. Otherwise → NewDevice() returns a fresh in-memory device that whatsmeow
// will persist via Connect → QR pairing flow.
func (c *Channel) resolveDevice(ctx context.Context) (*wastore.Device, error) {
if c.configJID != "" {
jid, err := types.ParseJID(c.configJID)
if err == nil {
dev, err := c.container.GetDevice(ctx, jid)
if err != nil {
return nil, fmt.Errorf("whatsapp get device by jid %s: %w", jid, err)
}
if dev != nil {
return dev, nil
}
slog.Warn("whatsapp: stored JID not found in device store, falling back to fresh pairing",
"channel", c.Name(), "jid", c.configJID)
} else {
slog.Warn("whatsapp: stored JID is malformed, falling back to fresh pairing",
"channel", c.Name(), "jid", c.configJID, "error", err)
}
}
if dev, ok := c.adoptOrphanDevice(ctx); ok {
slog.Info("whatsapp: adopted existing device for instance",
"channel", c.Name(), "jid", dev.ID)
// Persist the adopted JID so subsequent boots take the configJID path
// directly and don't risk re-adopting a device already claimed by another
// channel that just happened to start later.
if dev.ID != nil {
c.persistJID(ctx, *dev.ID)
}
return dev, nil
}
return c.container.NewDevice(), nil
}

// adoptOrphanDevice handles the upgrade case where a deploy with a single
// pre-existing whatsmeow_device row gains a second WhatsApp channel_instance.
// To avoid stealing the legacy device from the wrong instance, we only adopt
// when ALL of the following hold:
// - exactly one whatsmeow_device row exists in the store (so there is no
// ambiguity about which device is "the legacy one"), AND
// - exactly one WhatsApp channel_instance exists in the database (so the
// legacy device unambiguously belongs to that instance), AND
// - this channel IS that single instance.
//
// In every other configuration (multi-instance deploys, fresh installs, etc.)
// adoption is skipped and the channel goes through QR pairing.
func (c *Channel) adoptOrphanDevice(ctx context.Context) (*wastore.Device, bool) {
if c.instanceStore == nil || c.instanceID == uuid.Nil {
return nil, false
}
devs, err := c.container.GetAllDevices(ctx)
if err != nil || len(devs) != 1 {
return nil, false
}
dev := devs[0]
if dev == nil || dev.ID == nil {
return nil, false
}
listCtx := store.WithCrossTenant(ctx)
instances, err := c.instanceStore.ListAllInstances(listCtx)
if err != nil {
slog.Warn("whatsapp: list instances for adoption failed", "error", err)
return nil, false
}
var (
whatsappCount int
soleID uuid.UUID
soleJID string
)
for _, inst := range instances {
if inst.ChannelType != channels.TypeWhatsApp {
continue
}
whatsappCount++
if whatsappCount > 1 {
return nil, false
}
soleID = inst.ID
var ic struct {
JID string `json:"jid"`
}
if len(inst.Config) > 0 {
_ = json.Unmarshal(inst.Config, &ic)
}
soleJID = ic.JID
}
if whatsappCount != 1 || soleID != c.instanceID {
return nil, false
}
if soleJID != "" && soleJID != dev.ID.String() {
// Sole instance already claims a different JID — refuse to adopt.
return nil, false
}
return dev, true
}

// persistJID writes the device JID back to channel_instances.config so the next
// channel start binds to the same device without going through QR. Best-effort:
// failures are logged but don't fail the boot — the channel is already connected.
func (c *Channel) persistJID(ctx context.Context, jid types.JID) {
if c.instanceStore == nil || c.instanceID == uuid.Nil {
return
}
jidStr := jid.String()
if jidStr == c.configJID {
return
}
tenantID := c.TenantID()
scopeCtx := ctx
if tenantID != uuid.Nil {
scopeCtx = store.WithTenantID(ctx, tenantID)
} else {
scopeCtx = store.WithCrossTenant(ctx)
}
inst, err := c.instanceStore.Get(scopeCtx, c.instanceID)
if err != nil {
slog.Warn("whatsapp: persist JID — instance lookup failed",
"channel", c.Name(), "instance_id", c.instanceID, "error", err)
return
}
cfgMap := map[string]any{}
if len(inst.Config) > 0 {
if err := json.Unmarshal(inst.Config, &cfgMap); err != nil {
slog.Warn("whatsapp: persist JID — config unmarshal failed",
"channel", c.Name(), "error", err)
cfgMap = map[string]any{}
}
}
cfgMap["jid"] = jidStr
cfgBytes, err := json.Marshal(cfgMap)
if err != nil {
slog.Warn("whatsapp: persist JID — config marshal failed", "error", err)
return
}
if err := c.instanceStore.Update(scopeCtx, c.instanceID,
map[string]any{"config": cfgBytes}); err != nil {
slog.Warn("whatsapp: persist JID — update failed",
"channel", c.Name(), "instance_id", c.instanceID, "error", err)
return
}
c.configJID = jidStr
slog.Info("whatsapp: persisted device JID to channel instance",
"channel", c.Name(), "jid", jidStr)
}

// Start initializes the whatsmeow client and connects to WhatsApp.
func (c *Channel) Start(ctx context.Context) error {
slog.Info("starting whatsapp channel (whatsmeow)")
Expand All @@ -118,7 +292,7 @@ func (c *Channel) Start(ctx context.Context) error {
c.parentCtx = ctx
c.ctx, c.cancel = context.WithCancel(ctx)

deviceStore, err := c.container.GetFirstDevice(ctx)
deviceStore, err := c.resolveDevice(ctx)
if err != nil {
return fmt.Errorf("whatsapp get device: %w", err)
}
Expand Down Expand Up @@ -183,7 +357,15 @@ func (c *Channel) handleEvent(evt any) {
case *events.LoggedOut:
c.handleLoggedOut(v)
case *events.PairSuccess:
slog.Info("whatsapp: pair success", "channel", c.Name())
slog.Info("whatsapp: pair success", "channel", c.Name(), "jid", v.ID.String())
// Bind this freshly-paired device to our channel_instances row so the next
// boot reuses the same device instead of going back through QR (or worse,
// adopting a sibling channel's device).
if c.parentCtx != nil {
c.persistJID(c.parentCtx, v.ID)
} else {
c.persistJID(context.Background(), v.ID)
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ type SandboxConfig struct {
// Enhanced security
User string `json:"user,omitempty"` // container user (e.g. "1000:1000", "nobody")
TmpfsSizeMB int `json:"tmpfs_size_mb,omitempty"` // default tmpfs size in MB (0 = Docker default)
AllowTmpExec bool `json:"allow_tmp_exec,omitempty"` // drop `noexec` from tmpfs mounts (still keeps nosuid+nodev). Required by some bundled-binary CLIs that extract+exec from /tmp at runtime.
MaxOutputBytes int `json:"max_output_bytes,omitempty"` // limit exec output capture (default 1MB)

// Pruning (matching TS SandboxPruneSettings)
Expand Down Expand Up @@ -319,6 +320,7 @@ func (sc *SandboxConfig) ToSandboxConfig() sandbox.Config {
if sc.TmpfsSizeMB > 0 {
cfg.TmpfsSizeMB = sc.TmpfsSizeMB
}
cfg.AllowTmpExec = sc.AllowTmpExec
if sc.MaxOutputBytes > 0 {
cfg.MaxOutputBytes = sc.MaxOutputBytes
}
Expand Down
4 changes: 4 additions & 0 deletions internal/config/config_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,10 @@ func (c *Config) applyEnvOverrides() {
ensureSandbox()
c.Agents.Defaults.Sandbox.NetworkEnabled = v == "true" || v == "1"
}
if v := os.Getenv("GOCLAW_SANDBOX_TMP_EXEC"); v != "" {
ensureSandbox()
c.Agents.Defaults.Sandbox.AllowTmpExec = v == "true" || v == "1"
}

// Browser (for Docker-compose browser sidecar overlay)
envStr("GOCLAW_BROWSER_REMOTE_URL", &c.Tools.Browser.RemoteURL)
Expand Down
5 changes: 4 additions & 1 deletion internal/hooks/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ func (d *stdDispatcher) runSync(ctx context.Context, ev Event, chain []HookConfi
switch dec {
case DecisionBlock:
d.cb.record(ctx, cfg.ID, d.now(), d.store)
return FireResult{Decision: DecisionBlock}, nil
// Forward the script reason so callers can surface a self-
// documenting message to the agent. Reason stays empty for
// non-script handlers and for scripts that did not set one.
return FireResult{Decision: DecisionBlock, Reason: scriptRes.Reason}, nil
case DecisionTimeout:
d.cb.record(ctx, cfg.ID, d.now(), d.store)
if cfg.OnTimeout == DecisionBlock {
Expand Down
Loading