-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathConvertFrom-OpenSpecPdf.ps1
More file actions
56 lines (47 loc) · 2.04 KB
/
ConvertFrom-OpenSpecPdf.ps1
File metadata and controls
56 lines (47 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
function ConvertFrom-OpenSpecPdf {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$InputPath,
[Parameter(Mandatory)]
[string]$OutputPath,
[Parameter(Mandatory)]
[object]$Toolchain
)
$outputDirectory = Split-Path -Path $OutputPath -Parent
if (-not (Test-Path -LiteralPath $outputDirectory)) {
[void](New-Item -Path $outputDirectory -ItemType Directory -Force)
}
$notes = New-Object System.Collections.Generic.List[string]
if ($Toolchain.HasDocling) {
$doclingArguments = @('--to', 'md', '--output', $outputDirectory, $InputPath)
& $Toolchain.DoclingPath @doclingArguments
if ($LASTEXITCODE -eq 0) {
$candidate = Join-Path -Path $outputDirectory -ChildPath ("{0}.md" -f [System.IO.Path]::GetFileNameWithoutExtension($InputPath))
if (Test-Path -LiteralPath $candidate) {
Move-Item -LiteralPath $candidate -Destination $OutputPath -Force
return [pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.ConversionStep'
Strategy = 'docling-pdf'
OutputPath = $OutputPath
Notes = @('Converted with docling CLI.')
}
}
}
$notes.Add('docling was detected but did not produce expected markdown output.')
}
if ($Toolchain.HasMarkItDown) {
$markitdownArguments = @($InputPath, '--output', $OutputPath)
& $Toolchain.MarkItDownPath @markitdownArguments
if ($LASTEXITCODE -eq 0 -and (Test-Path -LiteralPath $OutputPath)) {
return [pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.ConversionStep'
Strategy = 'markitdown-pdf'
OutputPath = $OutputPath
Notes = @('Converted with markitdown CLI.')
}
}
$notes.Add('markitdown was detected but conversion failed.')
}
throw ("Unable to convert PDF '{0}' to Markdown. {1}" -f $InputPath, ($notes -join ' '))
}