Skip to content

Commit c1ea810

Browse files
committed
tests
1 parent b6ded3f commit c1ea810

File tree

3 files changed

+85
-0
lines changed

3 files changed

+85
-0
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ If I tell you to remember something, you do the same, update
1010

1111
## Rules to follow
1212
- MIME handling: always use `ManagedCode.MimeTypes` for MIME constants, lookups, and validation logic.
13+
- Treat this repository as a high-fidelity port of `microsoft-markitdown`: every test fixture copied from the upstream `tests/test_files/` directory must be referenced by .NET tests (either as positive conversions or explicit unsupported cases). No orphaned fixtures.
1314

1415
# Repository Guidelines
1516

tests/MarkItDown.Tests/MarkItDownIntegrationTests.cs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.IO;
34
using System.Linq;
45
using System.Text;
@@ -11,6 +12,12 @@ namespace MarkItDown.Tests;
1112
public class MarkItDownIntegrationTests
1213
{
1314
public static IEnumerable<object[]> GeneralVectors => TestVectorsData.General.Select(v => new object[] { v });
15+
public static IEnumerable<object[]> UnsupportedVectors => new List<object[]>
16+
{
17+
new object[] { "random.bin", new StreamInfo(mimeType: "application/octet-stream", extension: ".bin") },
18+
new object[] { "test.xls", new StreamInfo(mimeType: "application/vnd.ms-excel", extension: ".xls") },
19+
new object[] { "test_outlook_msg.msg", new StreamInfo(mimeType: "application/vnd.ms-outlook", extension: ".msg") },
20+
};
1421

1522
[Fact]
1623
public async Task ConvertAsync_WithValidFile_ReturnsSuccess()
@@ -405,6 +412,49 @@ public Task<DocumentConverterResult> ConvertAsync(Stream stream, StreamInfo stre
405412
}
406413
}
407414

415+
[Theory]
416+
[MemberData(nameof(UnsupportedVectors))]
417+
public async Task Convert_FilePath_UnsupportedThrows(string fileName, StreamInfo streamInfo)
418+
{
419+
Assert.NotNull(streamInfo);
420+
var markItDown = new global::MarkItDown.MarkItDown();
421+
var path = TestAssetLoader.GetAssetPath(fileName);
422+
423+
await Assert.ThrowsAsync<UnsupportedFormatException>(() => markItDown.ConvertAsync(path));
424+
}
425+
426+
[Theory]
427+
[MemberData(nameof(UnsupportedVectors))]
428+
public async Task Convert_StreamWithHints_UnsupportedThrows(string fileName, StreamInfo streamInfo)
429+
{
430+
var markItDown = new global::MarkItDown.MarkItDown();
431+
await using var stream = TestAssetLoader.OpenAsset(fileName);
432+
433+
await Assert.ThrowsAsync<UnsupportedFormatException>(() => markItDown.ConvertAsync(stream, streamInfo));
434+
}
435+
436+
[Theory]
437+
[MemberData(nameof(UnsupportedVectors))]
438+
public async Task Convert_StreamWithoutHints_UnsupportedThrows(string fileName, StreamInfo streamInfo)
439+
{
440+
Assert.NotNull(streamInfo);
441+
var markItDown = new global::MarkItDown.MarkItDown();
442+
await using var stream = TestAssetLoader.OpenAsset(fileName);
443+
444+
await Assert.ThrowsAsync<UnsupportedFormatException>(() => markItDown.ConvertAsync(stream, new StreamInfo()));
445+
}
446+
447+
[Theory]
448+
[MemberData(nameof(UnsupportedVectors))]
449+
public async Task Convert_DataUri_UnsupportedThrows(string fileName, StreamInfo streamInfo)
450+
{
451+
var markItDown = new global::MarkItDown.MarkItDown();
452+
var bytes = await File.ReadAllBytesAsync(TestAssetLoader.GetAssetPath(fileName));
453+
var dataUri = $"data:{streamInfo.MimeType ?? "application/octet-stream"};base64,{Convert.ToBase64String(bytes)}";
454+
455+
await Assert.ThrowsAsync<UnsupportedFormatException>(() => markItDown.ConvertUriAsync(dataUri));
456+
}
457+
408458
private static void AssertVectorOutput(FileTestVector vector, DocumentConverterResult result)
409459
{
410460
Assert.NotNull(result);

tests/MarkItDown.Tests/TestVectorsData.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ internal static class TestVectorsData
105105
MustNotInclude: Array.Empty<string>(),
106106
SupportsStreamGuess: false
107107
),
108+
new(
109+
FileName: "equations.docx",
110+
MimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
111+
Charset: null,
112+
Url: null,
113+
MustInclude: new[]
114+
{
115+
"From Eq. 36.1.3",
116+
},
117+
MustNotInclude: Array.Empty<string>(),
118+
SupportsStreamGuess: false
119+
),
108120
new(
109121
FileName: "test_serp.html",
110122
MimeType: "text/html",
@@ -117,6 +129,16 @@ internal static class TestVectorsData
117129
},
118130
MustNotInclude: Array.Empty<string>()
119131
),
132+
new(
133+
FileName: "test.epub",
134+
MimeType: "application/epub+zip",
135+
Charset: null,
136+
Url: null,
137+
MustInclude: Array.Empty<string>(),
138+
MustNotInclude: Array.Empty<string>(),
139+
SupportsStreamGuess: false,
140+
SupportsDataUri: false
141+
),
120142
new(
121143
FileName: "test_mskanji.csv",
122144
MimeType: "text/csv",
@@ -226,5 +248,17 @@ internal static class TestVectorsData
226248
},
227249
MustNotInclude: Array.Empty<string>()
228250
),
251+
new(
252+
FileName: "test.jpg",
253+
MimeType: "image/jpeg",
254+
Charset: null,
255+
Url: null,
256+
MustInclude: new[]
257+
{
258+
"*No image metadata available.*",
259+
},
260+
MustNotInclude: Array.Empty<string>(),
261+
SupportsStreamGuess: false
262+
),
229263
};
230264
}

0 commit comments

Comments
 (0)