Skip to content

Commit 42417ff

Browse files
committed
video indexer fixes
1 parent da892f0 commit 42417ff

File tree

4 files changed

+181
-16
lines changed

4 files changed

+181
-16
lines changed

Directory.Build.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
<PackageLicenseExpression>MIT</PackageLicenseExpression>
2323
<PackageReadmeFile>README.md</PackageReadmeFile>
2424
<Product>Managed Code - MarkItDown</Product>
25-
<Version>10.0.4</Version>
26-
<PackageVersion>10.0.4</PackageVersion>
25+
<Version>10.0.5</Version>
26+
<PackageVersion>10.0.5</PackageVersion>
2727
</PropertyGroup>
2828

2929
<PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">

src/MarkItDown/Intelligence/Providers/Azure/VideoIndexer/VideoIndexerClient.cs

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,9 @@ public void Dispose()
270270
return null;
271271
}
272272

273-
if (LooksLikeVideoIndexerAccountAccessToken(armToken, out var accountTokenExpiry))
273+
if (LooksLikeVideoIndexerAccountAccessToken(armToken, out var accountTokenExpiry, out var permission))
274274
{
275+
EnsureUploadPermission(permission);
275276
logger?.LogDebug("Using provided token as Azure Video Indexer account access token for account {AccountId}.", accountId);
276277
cachedAccountToken = new AccountAccessToken(armToken, accountTokenExpiry);
277278
return armToken;
@@ -331,13 +332,75 @@ private static string ResolveResourceId(AzureMediaIntelligenceOptions options)
331332

332333
private static string NormalizeResourceId(string resourceId)
333334
{
334-
var normalized = resourceId.Trim();
335-
if (!normalized.StartsWith("/", StringComparison.Ordinal))
335+
var normalized = ExtractResourcePath(resourceId);
336+
if (string.IsNullOrWhiteSpace(normalized))
336337
{
337-
normalized = "/" + normalized;
338+
throw new ArgumentException("Azure Video Indexer resource id must be provided.", nameof(resourceId));
338339
}
339340

340-
return normalized.TrimEnd('/');
341+
var trimmed = normalized.Trim().Trim('/');
342+
if (string.IsNullOrWhiteSpace(trimmed))
343+
{
344+
throw new ArgumentException("Azure Video Indexer resource id must be provided.", nameof(resourceId));
345+
}
346+
347+
var segments = trimmed.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
348+
if (TryGetVideoIndexerAccountSegmentIndex(segments, out var accountSegmentIndex))
349+
{
350+
return "/" + string.Join("/", segments, 0, accountSegmentIndex + 2);
351+
}
352+
353+
return "/" + trimmed;
354+
}
355+
356+
private static string ExtractResourcePath(string resourceId)
357+
{
358+
var candidate = resourceId.Trim();
359+
var queryOrFragmentSeparator = candidate.IndexOfAny(new[] { '?', '#' });
360+
if (queryOrFragmentSeparator >= 0)
361+
{
362+
candidate = candidate[..queryOrFragmentSeparator];
363+
}
364+
365+
if (Uri.TryCreate(candidate, UriKind.Absolute, out var uri))
366+
{
367+
candidate = uri.AbsolutePath;
368+
}
369+
370+
return candidate;
371+
}
372+
373+
private static bool TryGetVideoIndexerAccountSegmentIndex(string[] segments, out int accountSegmentIndex)
374+
{
375+
accountSegmentIndex = -1;
376+
377+
for (var index = 0; index < segments.Length - 1; index++)
378+
{
379+
if (!string.Equals(segments[index], "accounts", StringComparison.OrdinalIgnoreCase))
380+
{
381+
continue;
382+
}
383+
384+
if (index < 2)
385+
{
386+
continue;
387+
}
388+
389+
if (!string.Equals(segments[index - 2], "providers", StringComparison.OrdinalIgnoreCase))
390+
{
391+
continue;
392+
}
393+
394+
if (!string.Equals(segments[index - 1], "Microsoft.VideoIndexer", StringComparison.OrdinalIgnoreCase))
395+
{
396+
continue;
397+
}
398+
399+
accountSegmentIndex = index;
400+
return true;
401+
}
402+
403+
return false;
341404
}
342405

343406
private static string ReadProcessingState(JsonElement root)
@@ -425,9 +488,10 @@ private static string CreateUploadName(string fileName)
425488
return $"{nameWithoutExtension}-{Guid.NewGuid():N}{extension}";
426489
}
427490

428-
private static bool LooksLikeVideoIndexerAccountAccessToken(string token, out DateTimeOffset? expiresOn)
491+
private static bool LooksLikeVideoIndexerAccountAccessToken(string token, out DateTimeOffset? expiresOn, out string? permission)
429492
{
430493
expiresOn = null;
494+
permission = null;
431495

432496
if (!TryParseJwtPayload(token, out var payload))
433497
{
@@ -452,9 +516,28 @@ private static bool LooksLikeVideoIndexerAccountAccessToken(string token, out Da
452516
expiresOn = DateTimeOffset.FromUnixTimeSeconds(expUnix);
453517
}
454518

519+
if (payload.TryGetProperty("Permission", out var permissionNode) &&
520+
permissionNode.ValueKind == JsonValueKind.String)
521+
{
522+
permission = permissionNode.GetString();
523+
}
524+
455525
return true;
456526
}
457527

528+
private static void EnsureUploadPermission(string? permission)
529+
{
530+
if (!string.Equals(permission, "Reader", StringComparison.OrdinalIgnoreCase))
531+
{
532+
return;
533+
}
534+
535+
throw new FileConversionException(
536+
"Configured Azure Video Indexer token is read-only (Permission=Reader). " +
537+
"Uploading media requires Contributor permission. " +
538+
"Provide ArmAccessToken as an ARM token with contributor access or an account token with Contributor permission.");
539+
}
540+
458541
private static bool TryParseJwtPayload(string token, out JsonElement payload)
459542
{
460543
payload = default;

tests/MarkItDown.Tests/Intelligence/Integration/AzureIntelligenceIntegrationTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ namespace MarkItDown.Tests.Intelligence.Integration;
2626
public class AzureIntelligenceIntegrationTests(AzureIntelligenceFixture fixture) : IClassFixture<AzureIntelligenceFixture>
2727
{
2828
private const string HardcodedVideoIndexerArmAccessToken = "TOKEN";
29-
private const string HardcodedVideoIndexerAccountId = "ACCOUNT_GUID";
30-
private const string HardcodedVideoIndexerResourceId = "/subscriptions/SUBSCRIPTION-GUID/resourcegroups/AzureAI/providers/Microsoft.VideoIndexer/accounts/ACCOUNT_NAME/";
29+
private const string HardcodedVideoIndexerAccountId = "GUID";
30+
private const string HardcodedVideoIndexerResourceId = "/subscriptions/GUID/resourcegroups/AzureAI/providers/Microsoft.VideoIndexer/accounts/VIDEO-INDEXER-NAME/";
3131

3232
private static readonly AzureMediaIntelligenceOptions HardcodedVideoIndexerOptions = new()
3333
{
@@ -92,7 +92,7 @@ public async Task VideoIndexer_LiveSmokeTest()
9292
Assert.NotEmpty(result!.Segments);
9393
}
9494

95-
[Fact]
95+
[Fact(Skip = "Manul run only")]
9696
public async Task VideoIndexer_MarkItDownClient_LiveMp4ToMarkdown()
9797
{
9898
if (!IsHardcodedVideoIndexerConfigured())

tests/MarkItDown.Tests/Intelligence/VideoIndexerClientTests.cs

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,55 @@ public async Task UploadAsync_ResourceIdWithoutLeadingSlash_IsNormalizedForArmRe
108108
result!.Value.VideoId.ShouldBe("video123");
109109
}
110110

111+
[Theory]
112+
[InlineData("subscriptions/sub/resourceGroups/rg/providers/Microsoft.VideoIndexer/accounts/account/overview")]
113+
[InlineData("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.VideoIndexer/accounts/account/overview/")]
114+
[InlineData("https://management.azure.com/subscriptions/sub/resourceGroups/rg/providers/Microsoft.VideoIndexer/accounts/account/overview/?api-version=2024-01-01")]
115+
public async Task UploadAsync_ResourceIdWithExtraSegments_IsNormalizedToAccountPath(string resourceId)
116+
{
117+
var sequence = new SequenceHandler();
118+
119+
sequence.Enqueue(request =>
120+
{
121+
request.Method.ShouldBe(HttpMethod.Post);
122+
request.RequestUri!.ToString().ShouldContain("https://management.azure.com/subscriptions/sub/resourceGroups/rg/providers/Microsoft.VideoIndexer/accounts/account/generateAccessToken");
123+
request.RequestUri!.ToString().ShouldNotContain("/overview");
124+
request.RequestUri!.ToString().ShouldNotContain("https://management.azure.comsubscriptions/");
125+
126+
var payload = new
127+
{
128+
accessToken = "token123",
129+
expirationTime = "2025-01-01T00:00:00Z"
130+
};
131+
132+
return JsonResponse(HttpStatusCode.OK, payload);
133+
});
134+
135+
sequence.Enqueue(_ =>
136+
JsonResponse(HttpStatusCode.OK, new { id = "video123" }));
137+
138+
using var httpClient = new HttpClient(sequence)
139+
{
140+
BaseAddress = new Uri("https://api.videoindexer.ai/")
141+
};
142+
143+
var options = new AzureMediaIntelligenceOptions
144+
{
145+
AccountId = "account",
146+
Location = "trial",
147+
ResourceId = resourceId
148+
};
149+
150+
var client = new VideoIndexerClient(options, httpClient, new StubArmTokenService("arm-token"));
151+
await using var stream = new MemoryStream(new byte[] { 1, 2, 3 });
152+
var streamInfo = new StreamInfo(mimeType: "video/mp4", extension: ".mp4", fileName: "sample.mp4");
153+
154+
var result = await client.UploadAsync(stream, streamInfo, CancellationToken.None);
155+
156+
result.ShouldNotBeNull();
157+
result!.Value.VideoId.ShouldBe("video123");
158+
}
159+
111160
[Fact]
112161
public async Task UploadAsync_WithVideoIndexerAccountAccessToken_SkipsArmGenerateAccessToken()
113162
{
@@ -146,6 +195,32 @@ public async Task UploadAsync_WithVideoIndexerAccountAccessToken_SkipsArmGenerat
146195
result.Value.AccountAccessToken.ShouldBe(token);
147196
}
148197

198+
[Fact]
199+
public async Task UploadAsync_WithReadOnlyVideoIndexerAccountToken_FailsFastWithActionableError()
200+
{
201+
using var httpClient = new HttpClient(new SequenceHandler())
202+
{
203+
BaseAddress = new Uri("https://api.videoindexer.ai/")
204+
};
205+
206+
var options = new AzureMediaIntelligenceOptions
207+
{
208+
AccountId = "account",
209+
Location = "trial",
210+
ResourceId = "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.VideoIndexer/accounts/account"
211+
};
212+
213+
var readOnlyToken = BuildUnsignedJwtWithAudience("https://api.videoindexer.ai/", DateTimeOffset.UtcNow.AddMinutes(30), permission: "Reader");
214+
var client = new VideoIndexerClient(options, httpClient, new StubArmTokenService(readOnlyToken));
215+
await using var stream = new MemoryStream(new byte[] { 1, 2, 3 });
216+
var streamInfo = new StreamInfo(mimeType: "video/mp4", extension: ".mp4", fileName: "sample.mp4");
217+
218+
var exception = await Should.ThrowAsync<FileConversionException>(() => client.UploadAsync(stream, streamInfo, CancellationToken.None));
219+
220+
exception.Message.ShouldContain("Permission=Reader");
221+
exception.Message.ShouldContain("Contributor");
222+
}
223+
149224
[Fact]
150225
public async Task UploadAsync_WhenNameConflicts_RetriesWithGeneratedName()
151226
{
@@ -400,7 +475,7 @@ private static HttpResponseMessage JsonResponse(HttpStatusCode statusCode, objec
400475
return new HttpResponseMessage(statusCode) { Content = content };
401476
}
402477

403-
private static string BuildUnsignedJwtWithAudience(string audience, DateTimeOffset expiresOn)
478+
private static string BuildUnsignedJwtWithAudience(string audience, DateTimeOffset expiresOn, string? permission = null)
404479
{
405480
static string Encode(object value)
406481
{
@@ -410,11 +485,18 @@ static string Encode(object value)
410485
}
411486

412487
var header = Encode(new { alg = "none", typ = "JWT" });
413-
var payload = Encode(new
488+
var payloadBody = new Dictionary<string, object?>
414489
{
415-
aud = audience,
416-
exp = expiresOn.ToUnixTimeSeconds()
417-
});
490+
["aud"] = audience,
491+
["exp"] = expiresOn.ToUnixTimeSeconds()
492+
};
493+
494+
if (!string.IsNullOrWhiteSpace(permission))
495+
{
496+
payloadBody["Permission"] = permission;
497+
}
498+
499+
var payload = Encode(payloadBody);
418500

419501
return $"{header}.{payload}.";
420502
}

0 commit comments

Comments
 (0)