Skip to content

Commit 09d3e48

Browse files
fix(tracks): match ISRC ignoring dashes and case, add functional index (#832)
## Summary ISRC lookup via `GET /v1/tracks?isrc=…` failed when the stored value and the query disagreed on dash placement. Partner report (Thomas): Fixed purely on the search/query side (no normalization at write time): - Normalize the incoming `isrc` query param in Go: uppercase + strip non-alphanumeric characters before passing to the DB. - In the SQL, compare against `regexp_replace(upper(isrc), '[^A-Z0-9]', '', 'g')` so the stored value is normalized at compare time. Net result: any combination of dashes/spaces/casing in either the request or the stored row matches. `US-ANG-21-03742`, `USANG2103742`, `usang2103742`, and `US-ANG2103742` all resolve to the same row. ## Index Added `tracks_isrc_normalized_idx`, a **partial functional index** matching the comparison expression so the new query can index-scan instead of seq-scanning all of `tracks`: ```sql create index concurrently if not exists tracks_isrc_normalized_idx on public.tracks ((regexp_replace(upper(isrc), '[^A-Z0-9]'::text, ''::text, 'g'))) where isrc is not null; ``` Verified planner picks it on a 50k-row scratch table: ``` Bitmap Heap Scan on scratch_tracks (cost=16.43..248.18 rows=500 width=4) Recheck Cond: (regexp_replace(upper(isrc), '[^A-Z0-9]'::text, ''::text, 'g'::text) = ANY (...)) -> Bitmap Index Scan on scratch_tracks_isrc_norm_idx (cost=0.00..16.30 rows=500 width=0) Index Cond: (regexp_replace(upper(isrc), '[^A-Z0-9]'::text, ''::text, 'g'::text) = ANY (...)) ``` Index DDL uses `CREATE INDEX CONCURRENTLY` (no `BEGIN/COMMIT` wrapper) so the build does not take an `ACCESS EXCLUSIVE` lock on `tracks`. `IF NOT EXISTS` keeps the migration idempotent. Partial on `isrc IS NOT NULL` since most rows have no ISRC. Heads-up given #830: that PR moved `0201` away from `CONCURRENTLY` because the legacy Python Celery task on `challenge_disbursements` was keeping ~3-minute transactions open continuously, blocking the build's `virtualxid` wait indefinitely. `tracks` is written by the Go indexer and isn't subject to that long-transaction pattern, so `CONCURRENTLY` should complete normally here. If a stuck build needs to be aborted, drop the invalid index with `DROP INDEX IF EXISTS tracks_isrc_normalized_idx;` and re-run. ## ISWC There is no `iswc` query-param lookup today (column exists and is exposed in API responses, but no handler or sqlc query reads it). Nothing to mirror — left as-is. ## Test plan - [x] `TestGetTracksByISRC` covers: stored-with-dashes queried undashed, queried with same dashes, queried lowercased; stored-without-dashes queried as-is and queried with inserted dashes. - [x] Existing tracks tests (`TestTracksEndpoint`, `TestGetTracksByPermalink`, `TestGetTracksExcludesAccessAuthorities`, `Test200UnAuthed`) still pass. - [x] Migration applies cleanly and is idempotent (re-running prints `relation \"tracks_isrc_normalized_idx\" already exists, skipping`). - [x] `EXPLAIN` on a seeded scratch table shows `Bitmap Index Scan on tracks_isrc_normalized_idx`. - [ ] After deploy: verify `SELECT indexrelid::regclass, indisvalid FROM pg_index WHERE indexrelid::regclass::text = 'tracks_isrc_normalized_idx';` returns `indisvalid = true`. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6190f64 commit 09d3e48

5 files changed

Lines changed: 84 additions & 3 deletions

File tree

api/dbv1/get_track_ids_by_isrc.sql.go

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
-- name: GetTrackIdsByISRC :many
2+
-- Match ISRC ignoring dashes/whitespace/case so a stored value like
3+
-- "US-ANG-21-03742" matches a query of "USANG2103742" and vice versa.
4+
-- Inputs in @isrcs must already be normalized (uppercased, non-alphanumerics stripped).
25
SELECT track_id
36
FROM tracks
4-
WHERE isrc = ANY(@isrcs::text[])
7+
WHERE regexp_replace(upper(isrc), '[^A-Z0-9]', '', 'g') = ANY(@isrcs::text[])
58
AND access_authorities IS NULL;

api/v1_tracks.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
package api
22

33
import (
4+
"regexp"
45
"strings"
56

67
"api.audius.co/api/dbv1"
78
"api.audius.co/trashid"
89
"github.com/gofiber/fiber/v2"
910
)
1011

12+
// ISRCs may be stored with or without dashes (e.g. "US-ANG-21-03742" vs
13+
// "USANG2103742"); normalize so either form matches the same row.
14+
var isrcNonAlphanum = regexp.MustCompile(`[^A-Z0-9]`)
15+
16+
func normalizeISRC(s string) string {
17+
return isrcNonAlphanum.ReplaceAllString(strings.ToUpper(s), "")
18+
}
19+
1120
func (app *ApiServer) v1Tracks(c *fiber.Ctx) error {
1221
myId, _ := trashid.DecodeHashId(c.Query("user_id"))
1322
ids := decodeIdList(c)
@@ -41,7 +50,11 @@ func (app *ApiServer) v1Tracks(c *fiber.Ctx) error {
4150
// Add ISRC ID mappings
4251
isrcs := queryMulti(c, "isrc")
4352
if len(isrcs) > 0 {
44-
newIds, err := app.queries.GetTrackIdsByISRC(c.Context(), isrcs)
53+
normalized := make([]string, len(isrcs))
54+
for i, isrc := range isrcs {
55+
normalized[i] = normalizeISRC(isrc)
56+
}
57+
newIds, err := app.queries.GetTrackIdsByISRC(c.Context(), normalized)
4558
if err != nil {
4659
return err
4760
}

api/v1_tracks_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"testing"
66

77
"api.audius.co/api/dbv1"
8+
"api.audius.co/trashid"
89
"github.com/stretchr/testify/assert"
910
"github.com/stretchr/testify/require"
1011
)
@@ -36,6 +37,47 @@ func TestGetTracksByPermalink(t *testing.T) {
3637
})
3738
}
3839

40+
func TestGetTracksByISRC(t *testing.T) {
41+
app := testAppWithFixtures(t)
42+
ctx := context.Background()
43+
require.NotNil(t, app.writePool, "test requires write pool")
44+
45+
// Track 100 ("T1") stored with dashes; track 101 ("T2") stored without.
46+
_, err := app.writePool.Exec(ctx,
47+
`UPDATE tracks SET isrc = 'US-ANG-21-03742' WHERE track_id = 100 AND is_current = true`)
48+
require.NoError(t, err)
49+
_, err = app.writePool.Exec(ctx,
50+
`UPDATE tracks SET isrc = 'QMEU31610080' WHERE track_id = 101 AND is_current = true`)
51+
require.NoError(t, err)
52+
53+
track100Id := trashid.MustEncodeHashID(100)
54+
track101Id := trashid.MustEncodeHashID(101)
55+
56+
cases := []struct {
57+
name string
58+
query string
59+
wantId string
60+
}{
61+
{"stored-with-dashes, queried without", "USANG2103742", track100Id},
62+
{"stored-with-dashes, queried with same dashes", "US-ANG-21-03742", track100Id},
63+
{"stored-with-dashes, queried lowercased and undashed", "usang2103742", track100Id},
64+
{"stored-without-dashes, queried as-is", "QMEU31610080", track101Id},
65+
{"stored-without-dashes, queried with inserted dashes", "QM-EU3-16-10080", track101Id},
66+
}
67+
68+
for _, tc := range cases {
69+
t.Run(tc.name, func(t *testing.T) {
70+
var resp struct {
71+
Data []dbv1.Track
72+
}
73+
status, body := testGet(t, app, "/v1/tracks?isrc="+tc.query, &resp)
74+
require.Equal(t, 200, status, string(body))
75+
require.Len(t, resp.Data, 1, "expected exactly one track for %q: %s", tc.query, string(body))
76+
jsonAssert(t, body, map[string]any{"data.0.id": tc.wantId})
77+
})
78+
}
79+
}
80+
3981
func TestGetTracksExcludesAccessAuthorities(t *testing.T) {
4082
app := testAppWithFixtures(t)
4183
ctx := context.Background()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
-- Functional index supporting GetTrackIdsByISRC. The handler normalizes the
2+
-- incoming ?isrc= param (uppercase + strip non-alphanumerics) and the SQL
3+
-- compares against regexp_replace(upper(isrc), '[^A-Z0-9]', '', 'g'), so a
4+
-- plain btree on isrc cannot be used. This index materializes the same
5+
-- normalized expression, allowing dash/case-insensitive ISRC lookups to be
6+
-- index-scanned instead of seq-scanning ~all tracks. Partial on
7+
-- isrc IS NOT NULL since the vast majority of rows have no ISRC.
8+
--
9+
-- NOTE: intentionally NOT wrapped in BEGIN/COMMIT so CREATE INDEX
10+
-- CONCURRENTLY can run without taking an ACCESS EXCLUSIVE lock on tracks.
11+
-- IF NOT EXISTS makes the migration idempotent.
12+
13+
create index concurrently if not exists tracks_isrc_normalized_idx
14+
on public.tracks (
15+
(regexp_replace(upper(isrc), '[^A-Z0-9]'::text, ''::text, 'g'))
16+
)
17+
where isrc is not null;
18+
19+
comment on index public.tracks_isrc_normalized_idx is
20+
'Functional index supporting GetTrackIdsByISRC; matches regexp_replace(upper(isrc), ''[^A-Z0-9]'', '''', ''g'') so dash/case-insensitive ?isrc= lookups hit the index.';

0 commit comments

Comments
 (0)