Skip to content

Commit 7e65f93

Browse files
authored
refactor(go-api): parse python page_tokens (#5538)
The underlying datastore cursor uses the same proto in python's ndb and go's datastore libraries, so we can actually just give the Python cursor straight to the Go implementation since they do identical queries. I have done a small test to verify that this can work. Even if it's not 100% guaranteed, it's probably still better than nothing. This is only really needed for the Python to Go cutover, to avoid throwing a bunch of errors to users when we deploy. We can delete this code once the Go API is deployed and stable.
1 parent b99fb49 commit 7e65f93

2 files changed

Lines changed: 135 additions & 9 deletions

File tree

go/internal/database/datastore/vulnerability_matching.go

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,16 @@ func (s *VulnerabilityStore) MatchCommits(ctx context.Context, commit []byte, cu
5555
if cursor != "" {
5656
parsedCursor, err := parseMatchCommitsCursor(cursor)
5757
if err != nil {
58-
// TODO: attempt to recover from Python cursor
59-
yield(models.MatchResult{}, fmt.Errorf("%w: %w", models.ErrInvalidCursor, err))
60-
return
58+
// Attempt to recover from Python cursor
59+
pyCursor, _, pyErr := parsePythonCursor(cursor)
60+
if pyErr != nil {
61+
yield(models.MatchResult{}, fmt.Errorf("%w: %w", models.ErrInvalidCursor, err))
62+
return
63+
}
64+
q = q.Start(pyCursor)
65+
} else {
66+
q = q.Start(parsedCursor.cursor)
6167
}
62-
q = q.Start(parsedCursor.cursor)
6368
}
6469

6570
it := s.client.Run(ctx, q)
@@ -189,12 +194,18 @@ func (s *VulnerabilityStore) MatchPackages(ctx context.Context, ecosystem, name,
189194
if cursor != "" {
190195
parsed, err := parseMatchPackagesCursor(cursor)
191196
if err != nil {
192-
// TODO: attempt to recover from Python cursor
193-
yield(models.MatchResult{}, fmt.Errorf("%w: %w", models.ErrInvalidCursor, err))
194-
return
197+
// Attempt to recover from Python cursor
198+
pyCursor, lastID, pyErr := parsePythonCursor(cursor)
199+
if pyErr != nil {
200+
yield(models.MatchResult{}, fmt.Errorf("%w: %w", models.ErrInvalidCursor, err))
201+
return
202+
}
203+
lastReturnedID = lastID
204+
q = q.Start(pyCursor)
205+
} else {
206+
lastReturnedID = parsed.lastID
207+
q = q.Start(parsed.cursor)
195208
}
196-
lastReturnedID = parsed.lastID
197-
q = q.Start(parsed.cursor)
198209
}
199210

200211
it := s.client.Run(ctx, q)
@@ -573,3 +584,42 @@ func (s *VulnerabilityStore) MatchCommitsBatch(ctx context.Context, queries []mo
573584

574585
return results, nil
575586
}
587+
588+
// parsePythonCursor parses a Python NDB-style page token/cursor.
589+
// Python cursor format: query_number:ndb_cursor_urlsafe:base64(metadata_1):...
590+
func parsePythonCursor(token string) (datastore.Cursor, string, error) {
591+
parts := strings.Split(token, ":")
592+
if len(parts) < 2 {
593+
return datastore.Cursor{}, "", errors.New("invalid cursor format")
594+
}
595+
596+
cursorPart := strings.TrimRight(parts[1], "=")
597+
var cursor datastore.Cursor
598+
if cursorPart != "" && cursorPart != "RklSU1RfUEFHRV9UT0tFTg" {
599+
var err error
600+
cursor, err = datastore.DecodeCursor(cursorPart)
601+
if err != nil {
602+
return datastore.Cursor{}, "", fmt.Errorf("failed to decode datastore cursor: %w", err)
603+
}
604+
}
605+
606+
lastID := ""
607+
// Parse metadata fields
608+
for _, encMeta := range parts[2:] {
609+
// Use standard base64 URL encoding (with or without padding)
610+
metaBytes, err := base64.RawURLEncoding.DecodeString(encMeta)
611+
if err != nil {
612+
// Try with padding if raw decoding fails
613+
metaBytes, err = base64.URLEncoding.DecodeString(encMeta)
614+
if err != nil {
615+
continue
616+
}
617+
}
618+
k, v, found := strings.Cut(string(metaBytes), "=")
619+
if found && k == "last_id" {
620+
lastID = v
621+
}
622+
}
623+
624+
return cursor, lastID, nil
625+
}

go/internal/database/datastore/vulnerability_matching_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,3 +675,79 @@ func TestVulnerabilityStore_MatchPackages_Pagination_DuplicateIDAcrossCursor(t *
675675
t.Errorf("Second page mismatch (-want +got):\n%s", diff)
676676
}
677677
}
678+
679+
func TestParsePythonCursor(t *testing.T) {
680+
tests := []struct {
681+
name string
682+
token string
683+
wantCursor string // unpadded base64 representation of the cursor
684+
wantLastID string
685+
wantErr bool
686+
}{
687+
{
688+
name: "Package query real-world token with last_id metadata",
689+
token: "1:ClcKIAoHdnVsbl9pZBIVGhNHSFNBLTIzaG0tN3c0Ny14dzcyEi9qDm1-b3NzLXZkYi10ZXN0ch0LEhBBZmZlY3RlZFZlcnNpb25zGICAgKyCqLcJDBgAIAA=:bGFzdF9pZD1HSFNBLTIzaG0tN3c0Ny14dzcy",
690+
wantCursor: "ClcKIAoHdnVsbl9pZBIVGhNHSFNBLTIzaG0tN3c0Ny14dzcyEi9qDm1-b3NzLXZkYi10ZXN0ch0LEhBBZmZlY3RlZFZlcnNpb25zGICAgKyCqLcJDBgAIAA", // unpadded
691+
wantLastID: "GHSA-23hm-7w47-xw72",
692+
wantErr: false,
693+
},
694+
{
695+
name: "Commit query real-world token without metadata",
696+
token: "1:CkESO2oObX5vc3MtdmRiLXRlc3RyKQsSD0FmZmVjdGVkQ29tbWl0cyIUQ1VSTC1DVkUtMjAxMC0wNzM0LTAMGAAgAA==",
697+
wantCursor: "CkESO2oObX5vc3MtdmRiLXRlc3RyKQsSD0FmZmVjdGVkQ29tbWl0cyIUQ1VSTC1DVkUtMjAxMC0wNzM0LTAMGAAgAA", // correct unpadded
698+
wantLastID: "",
699+
wantErr: false,
700+
},
701+
{
702+
name: "First page token special case",
703+
token: "1:RklSU1RfUEFHRV9UT0tFTg==",
704+
wantCursor: "",
705+
wantLastID: "",
706+
wantErr: false,
707+
},
708+
{
709+
name: "First page token special case without padding",
710+
token: "1:RklSU1RfUEFHRV9UT0tFTg",
711+
wantCursor: "",
712+
wantLastID: "",
713+
wantErr: false,
714+
},
715+
{
716+
name: "Malformed token (insufficient parts)",
717+
token: "1",
718+
wantErr: true,
719+
},
720+
{
721+
name: "Malformed token (invalid base64 cursor)",
722+
token: "1:not-base64-!!!:bGFzdF9pZD1HSFNBLTIzaG0tN3c0Ny14dzcy",
723+
wantErr: true,
724+
},
725+
{
726+
name: "Malformed metadata (ignored, but cursor parsed successfully)",
727+
token: "1:ClcKIAoHdnVsbl9pZBIVGhNHSFNBLTIzaG0tN3c0Ny14dzcyEi9qDm1-b3NzLXZkYi10ZXN0ch0LEhBBZmZlY3RlZFZlcnNpb25zGICAgKyCqLcJDBgAIAA=:not-base64-!!!",
728+
wantCursor: "ClcKIAoHdnVsbl9pZBIVGhNHSFNBLTIzaG0tN3c0Ny14dzcyEi9qDm1-b3NzLXZkYi10ZXN0ch0LEhBBZmZlY3RlZFZlcnNpb25zGICAgKyCqLcJDBgAIAA",
729+
wantLastID: "",
730+
wantErr: false,
731+
},
732+
}
733+
734+
for _, tt := range tests {
735+
t.Run(tt.name, func(t *testing.T) {
736+
gotCursor, gotLastID, err := parsePythonCursor(tt.token)
737+
if (err != nil) != tt.wantErr {
738+
t.Fatalf("parsePythonCursor() error = %v, wantErr = %v", err, tt.wantErr)
739+
}
740+
if tt.wantErr {
741+
return
742+
}
743+
744+
if gotLastID != tt.wantLastID {
745+
t.Errorf("parsePythonCursor() gotLastID = %q, want %q", gotLastID, tt.wantLastID)
746+
}
747+
748+
if gotCursor.String() != tt.wantCursor {
749+
t.Errorf("parsePythonCursor() gotCursor.String() = %q, want %q", gotCursor.String(), tt.wantCursor)
750+
}
751+
})
752+
}
753+
}

0 commit comments

Comments
 (0)