Skip to content

Commit 815bcab

Browse files
committed
Drop PDF source conversion path
1 parent f2b8001 commit 815bcab

5 files changed

Lines changed: 12 additions & 71 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,5 @@ When you add a new exported function, add its name to the `$expected` array in t
6060
## Project-specific rules
6161

6262
- Do not remove or rename exported functions without updating `AwakeCoding.OpenSpecs.psd1` and the exports test.
63-
- Conversion: DOCX is handled in-module via OpenXML; PDF uses external `docling` or `markitdown` when available (see `AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1`). Output is textual (tables, ASCII), not image-based.
63+
- Conversion: DOCX is handled in-module via OpenXML. PDF is not used as a conversion source. Output is textual (tables, ASCII), not image-based.
6464
- For bulk or CI conversions, use `-Parallel -ThrottleLimit N` with `Convert-OpenSpecToMarkdown` or `Invoke-OpenSpecConversionPipeline` (PowerShell 7 only).

AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecPdf.ps1

Lines changed: 0 additions & 56 deletions
This file was deleted.

AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
function Get-OpenSpecToolchain {
22
[CmdletBinding()]
33
param(
4-
[switch]$RequirePdfConverter,
54
[switch]$RequireDocxConverter
65
)
76

@@ -22,10 +21,6 @@ function Get-OpenSpecToolchain {
2221
HasOpenXml = $null -ne $openXmlModule
2322
}
2423

25-
if ($RequirePdfConverter -and -not ($toolchain.HasDocling -or $toolchain.HasMarkItDown)) {
26-
throw 'No PDF converter detected. Install docling or markitdown.'
27-
}
28-
2924
if ($RequireDocxConverter -and -not $toolchain.HasOpenXml) {
3025
throw 'No DOCX converter detected. Install the OpenXML PowerShell module.'
3126
}

AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ function Convert-OpenSpecToMarkdown {
88

99
[string]$OutputPath = (Join-Path -Path (Get-Location) -ChildPath 'converted-specs'),
1010

11-
[ValidateSet('Auto', 'DOCX', 'PDF')]
11+
[ValidateSet('Auto', 'DOCX')]
1212
[string]$SourceFormat = 'Auto',
1313

1414
[switch]$Force,
@@ -84,6 +84,11 @@ function Convert-OpenSpecToMarkdown {
8484
$SourceFormat
8585
}
8686

87+
if ($resolvedFormat -eq 'PDF') {
88+
Write-Error "PDF source conversion is not supported. Use DOCX input for '$sourcePath'."
89+
continue
90+
}
91+
8792
if ($resolvedFormat -eq 'Unknown') {
8893
Write-Error "Unable to infer source format for '$sourcePath'."
8994
continue
@@ -136,12 +141,6 @@ function Convert-OpenSpecToMarkdown {
136141
$mediaDirectory = Join-Path -Path $specDirectory -ChildPath 'media'
137142
$conversionStep = ConvertFrom-OpenSpecDocx -InputPath $sourcePath -OutputPath $rawMarkdownPath -Toolchain $toolchain -MediaOutputDirectory $mediaDirectory
138143
}
139-
elseif ($resolvedFormat -eq 'PDF') {
140-
$toolchain = Get-OpenSpecToolchain -RequirePdfConverter
141-
$rawMarkdownPath = Join-Path -Path $artifactDirectory -ChildPath 'raw-pdf.md'
142-
$conversionStep = ConvertFrom-OpenSpecPdf -InputPath $sourcePath -OutputPath $rawMarkdownPath -Toolchain $toolchain
143-
}
144-
145144
$rawMarkdown = Get-Content -LiteralPath $conversionStep.OutputPath -Raw
146145
$normalized = ConvertTo-OpenSpecTextLayout -Markdown $rawMarkdown
147146
$sourceLinkMetadata = if ($conversionStep.PSObject.Properties['LinkMetadata']) { $conversionStep.LinkMetadata } else { $null }

AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ function Invoke-OpenSpecConversionPipeline {
55

66
[string]$Query,
77

8-
[ValidateSet('PDF', 'DOCX', 'Both')]
8+
[ValidateSet('DOCX', 'Both')]
99
[string]$Format = 'DOCX',
1010

1111
[string]$DownloadPath = (Join-Path -Path (Get-Location) -ChildPath 'downloads-convert'),
@@ -32,6 +32,9 @@ function Invoke-OpenSpecConversionPipeline {
3232
Save-OpenSpecDocument -Query $Query -Format $Format -OutputPath $DownloadPath -Force:$Force
3333
}
3434

35-
$toConvert = $downloadResults | Where-Object { $_.Status -in 'Downloaded', 'Exists' }
35+
$toConvert = $downloadResults | Where-Object { $_.Status -in 'Downloaded', 'Exists' -and $_.Format -eq 'DOCX' }
36+
if (-not @($toConvert)) {
37+
throw 'No DOCX files are available for conversion. PDF source conversion is not supported.'
38+
}
3639
$toConvert | Convert-OpenSpecToMarkdown -OutputPath $OutputPath -Force:$Force -Parallel:$Parallel -ThrottleLimit $ThrottleLimit -RemoveDocumentIndex:$RemoveDocumentIndex
3740
}

0 commit comments

Comments
 (0)