Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 129 additions & 3 deletions shortcuts/slides/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,74 @@
package slides

import (
"context"
"encoding/xml"
"errors"
"fmt"
"io"
"net/url"
"regexp"
"strings"
"time"

"github.com/larksuite/cli/errs"
"github.com/larksuite/cli/shortcuts/common"
)

const (
// slidesRateLimitMaxRetries is the number of automatic retries (beyond the
// initial request) when the API answers 99991400 "request trigger frequency
// limit". The slides batch paths (+create slide loop, placeholder image
// uploads) fire consecutive POSTs and are the dominant 99991400 producers
// in telemetry; a short backoff absorbs a transient burst without masking a
// genuinely saturated tenant.
slidesRateLimitMaxRetries = 2
)

// slidesRateLimitBaseDelay is the initial backoff delay; subsequent retries
// double it (1s, 2s). Mirrors the wiki +node-create lock-contention pattern
// but with a larger base because a frequency window takes longer to clear than
// a sub-second lock race. var (not const) only so tests can shrink it.
var slidesRateLimitBaseDelay = 1 * time.Second

// isRateLimitedErr reports whether err is a typed retryable rate-limit error
// (e.g. 99991400), as classified by errclass.BuildAPIError.
func isRateLimitedErr(err error) bool {
p, ok := errs.ProblemOf(err)
return ok && p.Subtype == errs.SubtypeRateLimit && p.Retryable
}

// retryOnRateLimit runs fn, retrying with exponential backoff (1s, 2s) when it
// returns a retryable rate-limit error. Any other outcome — success or a
// different error — is returned immediately. Progress is announced on errOut
// so a user watching a batch upload understands the pause.
func retryOnRateLimit(ctx context.Context, errOut io.Writer, fn func() error) error {
var lastErr error
for attempt := 0; attempt <= slidesRateLimitMaxRetries; attempt++ {
if attempt > 0 {
delay := slidesRateLimitBaseDelay << uint(attempt-1)
// Report the actual code from the error: the retry predicate matches
// any retryable SubtypeRateLimit, not just 99991400.
code := 0
if p, ok := errs.ProblemOf(lastErr); ok {
code = p.Code
}
fmt.Fprintf(errOut, "Rate limited by the API (%d), retrying (attempt %d/%d) in %v...\n",
code, attempt, slidesRateLimitMaxRetries, delay)
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(delay):
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
lastErr = fn()
if lastErr == nil || !isRateLimitedErr(lastErr) {
return lastErr
}
}
return lastErr
}

// presentationRef holds a parsed --presentation input.
//
// Slides shortcuts accept three input shapes:
Expand Down Expand Up @@ -125,8 +184,30 @@
// around `=`); without it we'd silently leave such placeholders unrewritten.
var imgSrcPlaceholderRegex = regexp.MustCompile(`(?s)<img\b[^>]*?\bsrc\s*=\s*(["'])@([^"']+)(["'])`)

// xmlEntityUnescaper reverses the five XML built-in entities in attribute
// values captured from raw slide XML. strings.Replacer scans left-to-right in
// a single pass, so "&amp;lt;" correctly yields "&lt;" (the leading "&amp;"
// is consumed first), matching XML unescape semantics.
var xmlEntityUnescaper = strings.NewReplacer(
"&lt;", "<",
"&gt;", ">",
"&quot;", `"`,
"&apos;", "'",
"&amp;", "&",
)

// placeholderFilePath converts a raw <img src="@..."> capture into the local
// filesystem path it refers to. The capture comes from well-formed XML where
// a literal & must be written &amp; (the precheck enforces this), so the
// entities are decoded before the path touches Stat/upload. Filesystem paths
// containing & are therefore written as e.g. src="@./Q1&amp;Q2.png".
func placeholderFilePath(raw string) string {
return xmlEntityUnescaper.Replace(strings.TrimSpace(raw))
}

// extractImagePlaceholderPaths returns the de-duplicated list of local paths
// referenced via <img src="@path"> in the given slide XML strings.
// referenced via <img src="@path"> in the given slide XML strings, with XML
// built-in entities decoded (see placeholderFilePath).
//
// Order is preserved (first occurrence wins) so dry-run / progress messages are
// stable across runs.
Expand All @@ -141,7 +222,7 @@
// so we filter it here. Treat as malformed XML and skip.
continue
}
path := strings.TrimSpace(m[2])
path := placeholderFilePath(m[2])
if path == "" || seen[path] {
continue
}
Expand Down Expand Up @@ -280,6 +361,48 @@
return xmlFragment[:m[1]] + "<content/>" + afterOpen
}

// checkXMLWellFormed verifies that fragment parses as well-formed XML, using
// the same parser family as the backend (Go encoding/xml). Syntax only —
// element names and attributes are NOT checked against the SML schema, so
// anything passing here can still be rejected server-side for semantic
// reasons; conversely nothing rejected here could ever have succeeded, which
// keeps the false-positive risk at zero.
//
// The backend reports these failures as an opaque 3350001/4001000
// "invalid param" with no position info; catching them locally turns the
// dominant real-world causes (bare & in text, unclosed tags, attribute
// quoting) into actionable messages with a line number.
//
// An <?xml ?> declaration is rejected explicitly: the rendering backend does
// not accept processing instructions on slide fragments (rejects with
// "?xml not provide the implement"). encoding/xml surfaces it as a regular
// ProcInst token, so it needs its own check.
//
// Multiple top-level elements are deliberately allowed — insertion fragments
// may legitimately carry sibling elements.
func checkXMLWellFormed(fragment string) error {
dec := xml.NewDecoder(strings.NewReader(fragment))
for {
tok, err := dec.Token()
if errors.Is(err, io.EOF) {
return nil
}
if err != nil {
var syn *xml.SyntaxError
if errors.As(err, &syn) {
return errs.NewValidationError(errs.SubtypeInvalidArgument,
"XML not well-formed at line %d: %s (escape literal & as &amp; and < as &lt; in text)",
syn.Line, syn.Msg)
}
return errs.NewValidationError(errs.SubtypeInvalidArgument, "XML not well-formed: %v", err)

Check warning on line 397 in shortcuts/slides/helpers.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/slides/helpers.go#L397

Added line #L397 was not covered by tests
}
if pi, ok := tok.(xml.ProcInst); ok && strings.EqualFold(pi.Target, "xml") {
return errs.NewValidationError(errs.SubtypeInvalidArgument,
"XML must not contain an <?xml ?> declaration (the slides backend rejects it); remove it and start at the root element")
}
}
}

// replaceImagePlaceholders rewrites <img src="@path"> occurrences in the input
// XML by looking up each path in tokens. Paths missing from the map are left
// untouched (callers should ensure the map is complete).
Expand All @@ -294,7 +417,10 @@
// Mismatched quotes — see extractImagePlaceholderPaths.
return match
}
token, ok := tokens[strings.TrimSpace(path)]
// tokens is keyed by the decoded filesystem path (see
// extractImagePlaceholderPaths), while oldQuoted below must use the
// raw capture so the literal XML text is what gets replaced.
token, ok := tokens[placeholderFilePath(path)]
if !ok {
return match
}
Expand Down
176 changes: 174 additions & 2 deletions shortcuts/slides/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
package slides

import (
"bytes"
"context"
"errors"
"reflect"
"strings"
"testing"
"time"

"github.com/larksuite/cli/errs"
)

func TestParsePresentationRef(t *testing.T) {
Expand Down Expand Up @@ -216,6 +222,15 @@ func TestExtractImagePlaceholderPaths(t *testing.T) {
in: []string{`<img src = "@./spaced.png" />`},
want: []string{"./spaced.png"},
},
{
// Regression: the well-formedness precheck forces a literal & in a
// filename to be written &amp; in the XML; the captured path must
// be entity-decoded before it reaches Stat/upload so the file is
// actually found on disk.
name: "decodes XML entities in path",
in: []string{`<img src="@./Q1&amp;Q2.png"/>`},
want: []string{"./Q1&Q2.png"},
},
}

for _, tt := range tests {
Expand All @@ -233,8 +248,9 @@ func TestReplaceImagePlaceholders(t *testing.T) {
t.Parallel()

tokens := map[string]string{
"./pic.png": "tok_abc",
"./b.png": "tok_b",
"./pic.png": "tok_abc",
"./b.png": "tok_b",
"./Q1&Q2.png": "tok_amp", // keyed by decoded filesystem path
}

tests := []struct {
Expand Down Expand Up @@ -280,6 +296,13 @@ func TestReplaceImagePlaceholders(t *testing.T) {
in: `<img src = "@./pic.png" topLeftX="10"/>`,
want: `<img src = "tok_abc" topLeftX="10"/>`,
},
{
// Regression: tokens are keyed by the decoded filesystem path, but
// the literal XML text (with &amp;) is what must be rewritten.
name: "decodes XML entities when looking up token",
in: `<img src="@./Q1&amp;Q2.png" topLeftX="10"/>`,
want: `<img src="tok_amp" topLeftX="10"/>`,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -413,3 +436,152 @@ func TestEnsureXMLRootID(t *testing.T) {
})
}
}

func TestCheckXMLWellFormed(t *testing.T) {
t.Parallel()

tests := []struct {
name string
in string
wantErr string
}{
{name: "simple element", in: `<shape type="rect"><content/></shape>`},
{name: "nested with attributes", in: `<slide><shape type="text"><content><p>hi</p></content></shape></slide>`},
// Insertion fragments may carry sibling top-level elements; the decoder
// must not enforce a single document element.
{name: "multiple top-level elements", in: `<p>a</p><p>b</p>`},
{name: "escaped entities", in: `<p>A &amp; B &lt;tag&gt; &quot;q&quot;</p>`},
{name: "CDATA with raw ampersand", in: `<p><![CDATA[a & b < c]]></p>`},
{name: "comment", in: `<!-- note --><shape/>`},
{name: "img placeholder attr", in: `<img src="@./local.png" width="100"/>`},
{name: "unicode text", in: `<p>项目汇报 🎯</p>`},

// Top CLI-path failure cause in engine logs: bare & in text.
{name: "bare ampersand", in: `<p>Q & A</p>`, wantErr: "line 1"},
{name: "bare ampersand multiline", in: "<slide>\n<p>R&D</p>\n</slide>", wantErr: "line 2"},
{name: "unclosed tag", in: `<shape><content></shape>`, wantErr: "not well-formed"},
{name: "unquoted attribute", in: `<shape type=rect/>`, wantErr: "not well-formed"},
{name: "stray closing tag", in: `<p>hi</p></div>`, wantErr: "not well-formed"},
{name: "undefined entity", in: `<p>a&nbsp;b</p>`, wantErr: "not well-formed"},

// nodeserver rejects processing instructions ("?xml not provide the
// implement"); reject the declaration locally regardless of position.
{name: "xml declaration", in: `<?xml version="1.0"?><shape/>`, wantErr: "declaration"},
{name: "xml declaration with encoding", in: `<?xml version="1.0" encoding="UTF-8"?><slide/>`, wantErr: "declaration"},
{name: "uppercase xml declaration", in: `<?XML version="1.0"?><shape/>`, wantErr: "declaration"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
err := checkXMLWellFormed(tt.in)
if tt.wantErr == "" {
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
return
}
if err == nil {
t.Fatalf("want error containing %q, got nil", tt.wantErr)
}
var ve *errs.ValidationError
if !errors.As(err, &ve) {
t.Fatalf("want *errs.ValidationError, got %T: %v", err, err)
}
if ve.Subtype != errs.SubtypeInvalidArgument {
t.Fatalf("want SubtypeInvalidArgument, got %v", ve.Subtype)
}
if !strings.Contains(err.Error(), tt.wantErr) {
t.Fatalf("want error containing %q, got %q", tt.wantErr, err.Error())
}
})
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// TestRetryOnRateLimit verifies the 99991400 backoff helper: retryable
// rate-limit errors are retried with backoff, anything else returns
// immediately, and exhaustion surfaces the last rate-limit error.
//
// Not parallel: shrinks the package-level slidesRateLimitBaseDelay.
func TestRetryOnRateLimit(t *testing.T) {
restore := slidesRateLimitBaseDelay
slidesRateLimitBaseDelay = time.Millisecond
t.Cleanup(func() { slidesRateLimitBaseDelay = restore })

rateLimitErr := func() error {
return errs.NewAPIError(errs.SubtypeRateLimit, "request trigger frequency limit").WithRetryable()
}

t.Run("success without retry", func(t *testing.T) {
var errOut bytes.Buffer
calls := 0
err := retryOnRateLimit(context.Background(), &errOut, func() error {
calls++
return nil
})
if err != nil || calls != 1 {
t.Fatalf("err=%v calls=%d, want nil/1", err, calls)
}
if errOut.Len() != 0 {
t.Fatalf("no retry message expected, got: %s", errOut.String())
}
})

t.Run("succeeds after transient rate limit", func(t *testing.T) {
var errOut bytes.Buffer
calls := 0
err := retryOnRateLimit(context.Background(), &errOut, func() error {
calls++
if calls <= 2 {
return rateLimitErr()
}
return nil
})
if err != nil || calls != 3 {
t.Fatalf("err=%v calls=%d, want nil/3", err, calls)
}
if !strings.Contains(errOut.String(), "retrying") {
t.Fatalf("expected retry announcement, got: %s", errOut.String())
}
})

t.Run("exhaustion returns last rate-limit error", func(t *testing.T) {
var errOut bytes.Buffer
calls := 0
err := retryOnRateLimit(context.Background(), &errOut, func() error {
calls++
return rateLimitErr()
})
if err == nil || !isRateLimitedErr(err) {
t.Fatalf("want rate-limit error after exhaustion, got: %v", err)
}
if calls != slidesRateLimitMaxRetries+1 {
t.Fatalf("calls=%d, want %d", calls, slidesRateLimitMaxRetries+1)
}
})

t.Run("non-rate-limit error returns immediately", func(t *testing.T) {
var errOut bytes.Buffer
calls := 0
boom := errs.NewAPIError(errs.SubtypeNotFound, "not found")
err := retryOnRateLimit(context.Background(), &errOut, func() error {
calls++
return boom
})
if !errors.Is(err, boom) || calls != 1 {
t.Fatalf("err=%v calls=%d, want boom/1", err, calls)
}
})

t.Run("cancelled context aborts the backoff wait", func(t *testing.T) {
var errOut bytes.Buffer
ctx, cancel := context.WithCancel(context.Background())
cancel()
err := retryOnRateLimit(ctx, &errOut, func() error {
return rateLimitErr()
})
if !errors.Is(err, context.Canceled) {
t.Fatalf("want context.Canceled, got: %v", err)
}
})
}
Loading
Loading