Skip to content

Commit 2adf8fb

Browse files
author
Mikhail Samin's Claude
committed
Add file-absolute start_seconds/end_seconds to enterprise matches
recognize_enterprise now reports where each song plays in the file as start_seconds / end_seconds (seconds, file-absolute). These are computed from the chunk offset the response otherwise carries only at the chunk level, so the position is no longer lost when chunks are flattened. The endpoint is now asked for accurate offsets by default, so the values are precise. The raw start_offset / end_offset remain as the fragment-relative milliseconds.
1 parent fc162ef commit 2adf8fb

8 files changed

Lines changed: 144 additions & 5 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cmake_minimum_required(VERSION 3.16)
2-
project(audd-cpp VERSION 1.5.11 LANGUAGES CXX)
2+
project(audd-cpp VERSION 1.5.12 LANGUAGES CXX)
33

44
# ---------- options ----------
55
option(AUDD_CXX20 "Build with C++20 instead of C++17" OFF)

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Identify a song from a local file path:
4040
auto result = client.recognize(audd::SourceFilePath{"/path/to/clip.mp3"});
4141
```
4242
43-
`recognize()` accepts a `std::string` (auto-classified as URL or file path), an explicit `audd::SourceUrl` / `audd::SourceFilePath`, or an `audd::SourceBytes` carrying raw audio bytes. For longer audio files, use `recognize_enterprise(source, opts)`, which returns `std::vector<EnterpriseMatch>` across the file's chunks. Each match carries the same core tags plus `score`, `start_offset`, `end_offset`, `isrc`, `upc`. Access to `isrc`, `upc`, and `score` requires a Startup plan or higher — [contact us](mailto:api@audd.io) for enterprise features.
43+
`recognize()` accepts a `std::string` (auto-classified as URL or file path), an explicit `audd::SourceUrl` / `audd::SourceFilePath`, or an `audd::SourceBytes` carrying raw audio bytes. For longer audio files, use `recognize_enterprise(source, opts)`, which returns `std::vector<EnterpriseMatch>` across the file's chunks. Each match carries the same core tags plus `score`, `isrc`, `upc`, and `start_seconds` / `end_seconds` — where the match plays in your file, in seconds. These are precise because the SDK requests accurate offsets by default; pass `opts.accurate_offsets = false` to turn that off. Behind them, `start_offset` / `end_offset` are the raw fragment-relative milliseconds. Access to `isrc`, `upc`, and `score` requires a Startup plan or higher — [contact us](mailto:api@audd.io) for enterprise features.
4444
4545
Every blocking method has an `_async` twin returning `std::future`: `recognize_async`, `recognize_enterprise_async`, `streams().add_async`, `advanced().find_lyrics_async`, etc. Reach for the future-based form when you want non-blocking dispatch from a UI thread or want to fan out concurrent calls.
4646

include/audd/recognition.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,12 @@ struct EnterpriseMatch {
200200
std::string song_link;
201201
int start_offset = 0;
202202
int end_offset = 0;
203+
// start_seconds / end_seconds are where the match plays within the user's
204+
// file, in seconds: the chunk's file offset plus start_offset / end_offset.
205+
// Computed by the SDK (not wire fields); std::nullopt when the chunk
206+
// carried no parseable offset.
207+
std::optional<double> start_seconds;
208+
std::optional<double> end_seconds;
203209

204210
std::map<std::string, nlohmann::json> extras;
205211
std::string raw_response;

include/audd/version.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#ifndef AUDD_VERSION_HPP
99
#define AUDD_VERSION_HPP
1010

11-
#define AUDD_VERSION "1.5.11"
11+
#define AUDD_VERSION "1.5.12"
1212

1313
namespace audd {
1414

src/client.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ void apply_enterprise_opts(internal::FormFields& f, const EnterpriseOptions& opt
128128
if (opts.limit) f.data["limit"] = std::to_string(*opts.limit);
129129
if (opts.skip_first_seconds) f.data["skip_first_seconds"] = std::to_string(*opts.skip_first_seconds);
130130
if (opts.use_timecode) f.data["use_timecode"] = (*opts.use_timecode ? "true" : "false");
131-
if (opts.accurate_offsets) f.data["accurate_offsets"] = (*opts.accurate_offsets ? "true" : "false");
131+
// accurate_offsets defaults on: precise start_seconds/end_seconds anchoring
132+
// is the SDK default unless the caller explicitly opts out.
133+
f.data["accurate_offsets"] = opts.accurate_offsets.value_or(true) ? "true" : "false";
132134
}
133135

134136
// Retry helper now lives in internal/retry.hpp so tests can verify the
@@ -355,7 +357,17 @@ AudD::recognize_enterprise(const Source& source, const EnterpriseOptions& opts)
355357
}
356358
for (const auto& chunk : *result_it) {
357359
auto parsed = internal::parse_enterprise_chunk(chunk);
358-
for (auto& song : parsed.songs) out.push_back(std::move(song));
360+
// The chunk offset anchors the fragment within the user's file. Each
361+
// match's start_offset / end_offset are milliseconds within the
362+
// fragment; add the file anchor to get absolute file seconds.
363+
auto base = internal::offset_to_seconds(parsed.offset);
364+
for (auto& song : parsed.songs) {
365+
if (base) {
366+
song.start_seconds = *base + song.start_offset / 1000.0;
367+
song.end_seconds = *base + song.end_offset / 1000.0;
368+
}
369+
out.push_back(std::move(song));
370+
}
359371
}
360372
return out;
361373
}

src/internal/json_parse.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#ifndef AUDD_INTERNAL_JSON_PARSE_HPP
55
#define AUDD_INTERNAL_JSON_PARSE_HPP
66

7+
#include <optional>
78
#include <string>
89

910
#include <nlohmann/json.hpp>
@@ -41,6 +42,11 @@ StreamCallbackNotification parse_stream_callback_notification(const nlo
4142
// branded_message extracts an "Artist — Title" string from a result map, if any.
4243
std::string branded_message(const nlohmann::json& result);
4344

45+
// offset_to_seconds parses an AudD chunk offset string into seconds. Accepts
46+
// "SS", "MM:SS", "HH:MM:SS", or a bare number. Returns std::nullopt on empty
47+
// or unparseable input. Never throws.
48+
std::optional<double> offset_to_seconds(const std::string& offset);
49+
4450
// extract_extras returns the subset of `obj` whose keys are NOT in `known`.
4551
std::map<std::string, nlohmann::json> extract_extras(
4652
const nlohmann::json& obj,

src/json_parse.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
#include "internal/json_parse.hpp"
55

66
#include <algorithm>
7+
#include <optional>
78
#include <set>
89
#include <string>
10+
#include <vector>
911

1012
#include <audd/error.hpp>
1113

@@ -47,6 +49,45 @@ std::int64_t j_int64(const nlohmann::json& j, const std::string& key) {
4749

4850
} // anonymous
4951

52+
std::optional<double> offset_to_seconds(const std::string& offset) {
53+
if (offset.empty()) return std::nullopt;
54+
55+
// Split on ':' into up to three colon-separated components.
56+
std::vector<std::string> parts;
57+
std::string cur;
58+
for (char ch : offset) {
59+
if (ch == ':') {
60+
parts.push_back(cur);
61+
cur.clear();
62+
} else {
63+
cur.push_back(ch);
64+
}
65+
}
66+
parts.push_back(cur);
67+
68+
if (parts.empty() || parts.size() > 3) return std::nullopt;
69+
70+
auto parse_component = [](const std::string& s) -> std::optional<double> {
71+
if (s.empty()) return std::nullopt;
72+
try {
73+
std::size_t consumed = 0;
74+
double v = std::stod(s, &consumed);
75+
if (consumed != s.size()) return std::nullopt; // trailing junk
76+
return v;
77+
} catch (const std::exception&) {
78+
return std::nullopt;
79+
}
80+
};
81+
82+
double total = 0.0;
83+
for (const auto& p : parts) {
84+
auto v = parse_component(p);
85+
if (!v) return std::nullopt;
86+
total = total * 60.0 + *v;
87+
}
88+
return total;
89+
}
90+
5091
std::map<std::string, nlohmann::json> extract_extras(
5192
const nlohmann::json& obj,
5293
const std::vector<std::string>& known) {

tests/test_enterprise_parse.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,80 @@
99
#include "internal/json_parse.hpp"
1010

1111
using audd::internal::parse_enterprise_chunk;
12+
using audd::internal::offset_to_seconds;
13+
14+
namespace {
15+
// flatten mirrors the chunk-flattening + offset anchoring performed in
16+
// AudD::recognize_enterprise: it walks the parsed chunks, computes each
17+
// match's absolute file position from the chunk offset plus the
18+
// fragment-relative start_offset/end_offset, and returns the flat vector.
19+
std::vector<audd::EnterpriseMatch> flatten(const nlohmann::json& result) {
20+
std::vector<audd::EnterpriseMatch> out;
21+
for (const auto& chunk : result) {
22+
auto parsed = parse_enterprise_chunk(chunk);
23+
auto base = offset_to_seconds(parsed.offset);
24+
for (auto& song : parsed.songs) {
25+
if (base) {
26+
song.start_seconds = *base + song.start_offset / 1000.0;
27+
song.end_seconds = *base + song.end_offset / 1000.0;
28+
}
29+
out.push_back(std::move(song));
30+
}
31+
}
32+
return out;
33+
}
34+
} // namespace
35+
36+
TEST_CASE("offset_to_seconds parses the documented offset shapes") {
37+
using doctest::Approx;
38+
CHECK_FALSE(offset_to_seconds("").has_value());
39+
CHECK(offset_to_seconds("30").value() == Approx(30.0));
40+
CHECK(offset_to_seconds("00:30").value() == Approx(30.0));
41+
CHECK(offset_to_seconds("00:01:00").value() == Approx(60.0));
42+
CHECK(offset_to_seconds("01:02:03").value() == Approx(3723.0));
43+
CHECK(offset_to_seconds("12.5").value() == Approx(12.5));
44+
// Unparseable -> nullopt, never throws.
45+
CHECK_FALSE(offset_to_seconds("not-a-time").has_value());
46+
CHECK_FALSE(offset_to_seconds("1:2:3:4").has_value());
47+
CHECK_FALSE(offset_to_seconds("00::30").has_value());
48+
}
49+
50+
TEST_CASE("enterprise flatten anchors start_seconds/end_seconds to the file") {
51+
using doctest::Approx;
52+
auto j = nlohmann::json::parse(R"([
53+
{
54+
"offset": "00:01:00",
55+
"songs": [{
56+
"artist": "Daft Punk",
57+
"title": "Get Lucky",
58+
"start_offset": 4200,
59+
"end_offset": 11800
60+
}]
61+
},
62+
{
63+
"songs": [{
64+
"artist": "No Offset",
65+
"title": "Untitled",
66+
"start_offset": 500,
67+
"end_offset": 9000
68+
}]
69+
}
70+
])");
71+
auto matches = flatten(j);
72+
REQUIRE(matches.size() == 2);
73+
74+
// Chunk offset 60s + 4200ms / 11800ms fragment-relative.
75+
REQUIRE(matches[0].start_seconds.has_value());
76+
REQUIRE(matches[0].end_seconds.has_value());
77+
CHECK(matches[0].start_seconds.value() == Approx(64.2));
78+
CHECK(matches[0].end_seconds.value() == Approx(71.8));
79+
80+
// Chunk with no offset -> seconds stay absent; raw offsets still present.
81+
CHECK_FALSE(matches[1].start_seconds.has_value());
82+
CHECK_FALSE(matches[1].end_seconds.has_value());
83+
CHECK(matches[1].start_offset == 500);
84+
CHECK(matches[1].end_offset == 9000);
85+
}
1286

1387
TEST_CASE("enterprise chunk parses songs array") {
1488
auto j = nlohmann::json::parse(R"({

0 commit comments

Comments
 (0)