Add file-absolute start_seconds/end_seconds to enterprise matches

Mikhail Samin's Claude · Mikhail Samin's Claude · commit 2adf8fb4ac14 · 2026-06-01T22:28:17.000Z
recognize_enterprise now reports where each song plays in the file as
start_seconds / end_seconds (seconds, file-absolute). These are computed from
the chunk offset the response otherwise carries only at the chunk level, so the
position is no longer lost when chunks are flattened. The endpoint is now asked
for accurate offsets by default, so the values are precise. The raw
start_offset / end_offset remain as the fragment-relative milliseconds.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.16)
-project(audd-cpp VERSION 1.5.11 LANGUAGES CXX)
+project(audd-cpp VERSION 1.5.12 LANGUAGES CXX)
 
 # ---------- options ----------
 option(AUDD_CXX20           "Build with C++20 instead of C++17"        OFF)
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ Identify a song from a local file path:
 auto result = client.recognize(audd::SourceFilePath{"/path/to/clip.mp3"});
 ```
 
-`recognize()` accepts a `std::string` (auto-classified as URL or file path), an explicit `audd::SourceUrl` / `audd::SourceFilePath`, or an `audd::SourceBytes` carrying raw audio bytes. For longer audio files, use `recognize_enterprise(source, opts)`, which returns `std::vector<EnterpriseMatch>` across the file's chunks. Each match carries the same core tags plus `score`, `start_offset`, `end_offset`, `isrc`, `upc`. Access to `isrc`, `upc`, and `score` requires a Startup plan or higher — [contact us](mailto:api@audd.io) for enterprise features.
+`recognize()` accepts a `std::string` (auto-classified as URL or file path), an explicit `audd::SourceUrl` / `audd::SourceFilePath`, or an `audd::SourceBytes` carrying raw audio bytes. For longer audio files, use `recognize_enterprise(source, opts)`, which returns `std::vector<EnterpriseMatch>` across the file's chunks. Each match carries the same core tags plus `score`, `isrc`, `upc`, and `start_seconds` / `end_seconds` — where the match plays in your file, in seconds. These are precise because the SDK requests accurate offsets by default; pass `opts.accurate_offsets = false` to turn that off. Behind them, `start_offset` / `end_offset` are the raw fragment-relative milliseconds. Access to `isrc`, `upc`, and `score` requires a Startup plan or higher — [contact us](mailto:api@audd.io) for enterprise features.
 
 Every blocking method has an `_async` twin returning `std::future`: `recognize_async`, `recognize_enterprise_async`, `streams().add_async`, `advanced().find_lyrics_async`, etc. Reach for the future-based form when you want non-blocking dispatch from a UI thread or want to fan out concurrent calls.
 
diff --git a/include/audd/recognition.hpp b/include/audd/recognition.hpp
@@ -200,6 +200,12 @@ struct EnterpriseMatch {
     std::string song_link;
     int         start_offset = 0;
     int         end_offset   = 0;
+    // start_seconds / end_seconds are where the match plays within the user's
+    // file, in seconds: the chunk's file offset plus start_offset / end_offset.
+    // Computed by the SDK (not wire fields); std::nullopt when the chunk
+    // carried no parseable offset.
+    std::optional<double> start_seconds;
+    std::optional<double> end_seconds;
 
     std::map<std::string, nlohmann::json> extras;
     std::string                           raw_response;
diff --git a/include/audd/version.hpp b/include/audd/version.hpp
@@ -8,7 +8,7 @@
 #ifndef AUDD_VERSION_HPP
 #define AUDD_VERSION_HPP
 
-#define AUDD_VERSION "1.5.11"
+#define AUDD_VERSION "1.5.12"
 
 namespace audd {
 
diff --git a/src/client.cpp b/src/client.cpp
@@ -128,7 +128,9 @@ void apply_enterprise_opts(internal::FormFields& f, const EnterpriseOptions& opt
     if (opts.limit)                    f.data["limit"]  = std::to_string(*opts.limit);
     if (opts.skip_first_seconds)       f.data["skip_first_seconds"] = std::to_string(*opts.skip_first_seconds);
     if (opts.use_timecode)             f.data["use_timecode"]    = (*opts.use_timecode    ? "true" : "false");
-    if (opts.accurate_offsets)         f.data["accurate_offsets"] = (*opts.accurate_offsets ? "true" : "false");
+    // accurate_offsets defaults on: precise start_seconds/end_seconds anchoring
+    // is the SDK default unless the caller explicitly opts out.
+    f.data["accurate_offsets"] = opts.accurate_offsets.value_or(true) ? "true" : "false";
 }
 
 // Retry helper now lives in internal/retry.hpp so tests can verify the
@@ -355,7 +357,17 @@ AudD::recognize_enterprise(const Source& source, const EnterpriseOptions& opts)
     }
     for (const auto& chunk : *result_it) {
         auto parsed = internal::parse_enterprise_chunk(chunk);
-        for (auto& song : parsed.songs) out.push_back(std::move(song));
+        // The chunk offset anchors the fragment within the user's file. Each
+        // match's start_offset / end_offset are milliseconds within the
+        // fragment; add the file anchor to get absolute file seconds.
+        auto base = internal::offset_to_seconds(parsed.offset);
+        for (auto& song : parsed.songs) {
+            if (base) {
+                song.start_seconds = *base + song.start_offset / 1000.0;
+                song.end_seconds   = *base + song.end_offset / 1000.0;
+            }
+            out.push_back(std::move(song));
+        }
     }
     return out;
 }
diff --git a/src/internal/json_parse.hpp b/src/internal/json_parse.hpp
@@ -4,6 +4,7 @@
 #ifndef AUDD_INTERNAL_JSON_PARSE_HPP
 #define AUDD_INTERNAL_JSON_PARSE_HPP
 
+#include <optional>
 #include <string>
 
 #include <nlohmann/json.hpp>
@@ -41,6 +42,11 @@ StreamCallbackNotification          parse_stream_callback_notification(const nlo
 // branded_message extracts an "Artist — Title" string from a result map, if any.
 std::string branded_message(const nlohmann::json& result);
 
+// offset_to_seconds parses an AudD chunk offset string into seconds. Accepts
+// "SS", "MM:SS", "HH:MM:SS", or a bare number. Returns std::nullopt on empty
+// or unparseable input. Never throws.
+std::optional<double> offset_to_seconds(const std::string& offset);
+
 // extract_extras returns the subset of `obj` whose keys are NOT in `known`.
 std::map<std::string, nlohmann::json> extract_extras(
     const nlohmann::json& obj,
diff --git a/src/json_parse.cpp b/src/json_parse.cpp
@@ -4,8 +4,10 @@
 #include "internal/json_parse.hpp"
 
 #include <algorithm>
+#include <optional>
 #include <set>
 #include <string>
+#include <vector>
 
 #include <audd/error.hpp>
 
@@ -47,6 +49,45 @@ std::int64_t j_int64(const nlohmann::json& j, const std::string& key) {
 
 } // anonymous
 
+std::optional<double> offset_to_seconds(const std::string& offset) {
+    if (offset.empty()) return std::nullopt;
+
+    // Split on ':' into up to three colon-separated components.
+    std::vector<std::string> parts;
+    std::string cur;
+    for (char ch : offset) {
+        if (ch == ':') {
+            parts.push_back(cur);
+            cur.clear();
+        } else {
+            cur.push_back(ch);
+        }
+    }
+    parts.push_back(cur);
+
+    if (parts.empty() || parts.size() > 3) return std::nullopt;
+
+    auto parse_component = [](const std::string& s) -> std::optional<double> {
+        if (s.empty()) return std::nullopt;
+        try {
+            std::size_t consumed = 0;
+            double v = std::stod(s, &consumed);
+            if (consumed != s.size()) return std::nullopt; // trailing junk
+            return v;
+        } catch (const std::exception&) {
+            return std::nullopt;
+        }
+    };
+
+    double total = 0.0;
+    for (const auto& p : parts) {
+        auto v = parse_component(p);
+        if (!v) return std::nullopt;
+        total = total * 60.0 + *v;
+    }
+    return total;
+}
+
 std::map<std::string, nlohmann::json> extract_extras(
     const nlohmann::json& obj,
     const std::vector<std::string>& known) {
diff --git a/tests/test_enterprise_parse.cpp b/tests/test_enterprise_parse.cpp
@@ -9,6 +9,80 @@
 #include "internal/json_parse.hpp"
 
 using audd::internal::parse_enterprise_chunk;
+using audd::internal::offset_to_seconds;
+
+namespace {
+// flatten mirrors the chunk-flattening + offset anchoring performed in
+// AudD::recognize_enterprise: it walks the parsed chunks, computes each
+// match's absolute file position from the chunk offset plus the
+// fragment-relative start_offset/end_offset, and returns the flat vector.
+std::vector<audd::EnterpriseMatch> flatten(const nlohmann::json& result) {
+    std::vector<audd::EnterpriseMatch> out;
+    for (const auto& chunk : result) {
+        auto parsed = parse_enterprise_chunk(chunk);
+        auto base = offset_to_seconds(parsed.offset);
+        for (auto& song : parsed.songs) {
+            if (base) {
+                song.start_seconds = *base + song.start_offset / 1000.0;
+                song.end_seconds   = *base + song.end_offset / 1000.0;
+            }
+            out.push_back(std::move(song));
+        }
+    }
+    return out;
+}
+} // namespace
+
+TEST_CASE("offset_to_seconds parses the documented offset shapes") {
+    using doctest::Approx;
+    CHECK_FALSE(offset_to_seconds("").has_value());
+    CHECK(offset_to_seconds("30").value() == Approx(30.0));
+    CHECK(offset_to_seconds("00:30").value() == Approx(30.0));
+    CHECK(offset_to_seconds("00:01:00").value() == Approx(60.0));
+    CHECK(offset_to_seconds("01:02:03").value() == Approx(3723.0));
+    CHECK(offset_to_seconds("12.5").value() == Approx(12.5));
+    // Unparseable -> nullopt, never throws.
+    CHECK_FALSE(offset_to_seconds("not-a-time").has_value());
+    CHECK_FALSE(offset_to_seconds("1:2:3:4").has_value());
+    CHECK_FALSE(offset_to_seconds("00::30").has_value());
+}
+
+TEST_CASE("enterprise flatten anchors start_seconds/end_seconds to the file") {
+    using doctest::Approx;
+    auto j = nlohmann::json::parse(R"([
+        {
+            "offset": "00:01:00",
+            "songs": [{
+                "artist": "Daft Punk",
+                "title": "Get Lucky",
+                "start_offset": 4200,
+                "end_offset": 11800
+            }]
+        },
+        {
+            "songs": [{
+                "artist": "No Offset",
+                "title": "Untitled",
+                "start_offset": 500,
+                "end_offset": 9000
+            }]
+        }
+    ])");
+    auto matches = flatten(j);
+    REQUIRE(matches.size() == 2);
+
+    // Chunk offset 60s + 4200ms / 11800ms fragment-relative.
+    REQUIRE(matches[0].start_seconds.has_value());
+    REQUIRE(matches[0].end_seconds.has_value());
+    CHECK(matches[0].start_seconds.value() == Approx(64.2));
+    CHECK(matches[0].end_seconds.value() == Approx(71.8));
+
+    // Chunk with no offset -> seconds stay absent; raw offsets still present.
+    CHECK_FALSE(matches[1].start_seconds.has_value());
+    CHECK_FALSE(matches[1].end_seconds.has_value());
+    CHECK(matches[1].start_offset == 500);
+    CHECK(matches[1].end_offset == 9000);
+}
 
 TEST_CASE("enterprise chunk parses songs array") {
     auto j = nlohmann::json::parse(R"({