Skip to content

Commit 5fe5e8d

Browse files
bundoleeclaude
andcommitted
fix(cli): improve --hybrid-hancom-ai-* validation messages and --hybrid help
Two fixes from PR #462 review feedback: 1. CLI-layer validation for the 3 enum-style hancom-ai options (--hybrid-hancom-ai-{regionlist-strategy,ocr-strategy,image-cache}) now produces messages naming the CLI flag, matching the pattern already used by --hybrid, --hybrid-mode, --image-output, etc. Previously the HybridConfig setter's IllegalArgumentException leaked through with a Java field name and no flag context. 2. --hybrid help text now lists hancom-ai as a valid value alongside off and docling-fast. Previously the description omitted hancom-ai even though the new --hybrid-hancom-ai-* options require it. Three new negative-case tests cover invalid values for each enum option. Regenerated bindings via npm run sync. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a600a16 commit 5fe5e8d

7 files changed

Lines changed: 74 additions & 9 deletions

File tree

java/opendataloader-pdf-cli/src/main/java/org/opendataloader/pdf/cli/CLIOptions.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.apache.commons.cli.Option;
2020
import org.apache.commons.cli.Options;
2121
import org.opendataloader.pdf.api.Config;
22+
import org.opendataloader.pdf.hybrid.HybridConfig;
2223

2324
import java.io.File;
2425
import java.io.PrintStream;
@@ -118,7 +119,7 @@ public class CLIOptions {
118119
private static final String HYBRID_LONG_OPTION = "hybrid";
119120
private static final String HYBRID_DESC = "Hybrid backend (requires a running server). "
120121
+ "Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. "
121-
+ "For remote servers use --hybrid-url. Values: off (default), docling-fast";
122+
+ "For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai";
122123

123124
private static final String HYBRID_MODE_LONG_OPTION = "hybrid-mode";
124125
private static final String HYBRID_MODE_DESC = "Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)";
@@ -620,19 +621,42 @@ private static void applyHybridOptions(Config config, CommandLine commandLine) {
620621
if (commandLine.hasOption(HYBRID_HANCOM_AI_REGIONLIST_STRATEGY_LONG_OPTION)) {
621622
String value = commandLine.getOptionValue(HYBRID_HANCOM_AI_REGIONLIST_STRATEGY_LONG_OPTION);
622623
if (value != null && !value.trim().isEmpty()) {
623-
config.getHybridConfig().setRegionlistStrategy(value.trim().toLowerCase(Locale.ROOT));
624+
String normalized = value.trim().toLowerCase(Locale.ROOT);
625+
if (!HybridConfig.REGIONLIST_TABLE_FIRST.equals(normalized)
626+
&& !HybridConfig.REGIONLIST_LIST_ONLY.equals(normalized)) {
627+
throw new IllegalArgumentException(String.format(
628+
"Option --%s: unsupported value '%s'. Supported values: %s, %s",
629+
HYBRID_HANCOM_AI_REGIONLIST_STRATEGY_LONG_OPTION, normalized,
630+
HybridConfig.REGIONLIST_TABLE_FIRST, HybridConfig.REGIONLIST_LIST_ONLY));
631+
}
632+
config.getHybridConfig().setRegionlistStrategy(normalized);
624633
}
625634
}
626635
if (commandLine.hasOption(HYBRID_HANCOM_AI_OCR_STRATEGY_LONG_OPTION)) {
627636
String value = commandLine.getOptionValue(HYBRID_HANCOM_AI_OCR_STRATEGY_LONG_OPTION);
628637
if (value != null && !value.trim().isEmpty()) {
629-
config.getHybridConfig().setOcrStrategy(value.trim().toLowerCase(Locale.ROOT));
638+
String normalized = value.trim().toLowerCase(Locale.ROOT);
639+
if (!HybridConfig.OCR_OFF.equals(normalized)
640+
&& !HybridConfig.OCR_AUTO.equals(normalized)
641+
&& !HybridConfig.OCR_FORCE.equals(normalized)) {
642+
throw new IllegalArgumentException(String.format(
643+
"Option --%s: unsupported value '%s'. Supported values: %s, %s, %s",
644+
HYBRID_HANCOM_AI_OCR_STRATEGY_LONG_OPTION, normalized,
645+
HybridConfig.OCR_OFF, HybridConfig.OCR_AUTO, HybridConfig.OCR_FORCE));
646+
}
647+
config.getHybridConfig().setOcrStrategy(normalized);
630648
}
631649
}
632650
if (commandLine.hasOption(HYBRID_HANCOM_AI_IMAGE_CACHE_LONG_OPTION)) {
633651
String value = commandLine.getOptionValue(HYBRID_HANCOM_AI_IMAGE_CACHE_LONG_OPTION);
634652
if (value != null && !value.trim().isEmpty()) {
635-
config.getHybridConfig().setImageCache(value.trim().toLowerCase(Locale.ROOT));
653+
String normalized = value.trim().toLowerCase(Locale.ROOT);
654+
if (!"memory".equals(normalized) && !"disk".equals(normalized)) {
655+
throw new IllegalArgumentException(String.format(
656+
"Option --%s: unsupported value '%s'. Supported values: memory, disk",
657+
HYBRID_HANCOM_AI_IMAGE_CACHE_LONG_OPTION, normalized));
658+
}
659+
config.getHybridConfig().setImageCache(normalized);
636660
}
637661
}
638662
if (commandLine.hasOption(HYBRID_HANCOM_AI_SAVE_CROPS_LONG_OPTION)) {

java/opendataloader-pdf-cli/src/test/java/org/opendataloader/pdf/cli/CLIOptionsTest.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,47 @@ void testCreateConfig_withHybridHancomAiCropOutputDir() throws ParseException {
540540
assertEquals("/tmp/crops", config.getHybridConfig().getCropOutputDir());
541541
}
542542

543+
@Test
544+
void testCreateConfig_hybridHancomAiRegionlistStrategy_invalidValue_throws() {
545+
String[] args = {"--hybrid", "hancom-ai",
546+
"--hybrid-hancom-ai-regionlist-strategy", "bogus",
547+
testPdf.getAbsolutePath()};
548+
IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> {
549+
CommandLine cmd = parser.parse(options, args);
550+
CLIOptions.createConfigFromCommandLine(cmd);
551+
});
552+
assertTrue(ex.getMessage().contains("--hybrid-hancom-ai-regionlist-strategy"),
553+
"Error should name the offending CLI flag, got: " + ex.getMessage());
554+
assertTrue(ex.getMessage().contains("table-first") && ex.getMessage().contains("list-only"),
555+
"Error should list valid values, got: " + ex.getMessage());
556+
}
557+
558+
@Test
559+
void testCreateConfig_hybridHancomAiOcrStrategy_invalidValue_throws() {
560+
String[] args = {"--hybrid", "hancom-ai",
561+
"--hybrid-hancom-ai-ocr-strategy", "bogus",
562+
testPdf.getAbsolutePath()};
563+
IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> {
564+
CommandLine cmd = parser.parse(options, args);
565+
CLIOptions.createConfigFromCommandLine(cmd);
566+
});
567+
assertTrue(ex.getMessage().contains("--hybrid-hancom-ai-ocr-strategy"),
568+
"Error should name the offending CLI flag, got: " + ex.getMessage());
569+
}
570+
571+
@Test
572+
void testCreateConfig_hybridHancomAiImageCache_invalidValue_throws() {
573+
String[] args = {"--hybrid", "hancom-ai",
574+
"--hybrid-hancom-ai-image-cache", "bogus",
575+
testPdf.getAbsolutePath()};
576+
IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> {
577+
CommandLine cmd = parser.parse(options, args);
578+
CLIOptions.createConfigFromCommandLine(cmd);
579+
});
580+
assertTrue(ex.getMessage().contains("--hybrid-hancom-ai-image-cache"),
581+
"Error should name the offending CLI flag, got: " + ex.getMessage());
582+
}
583+
543584
@Test
544585
void testCreateConfig_hybridHancomAiOption_withoutHancomAi_throws() {
545586
String[] args = {"--hybrid-hancom-ai-regionlist-strategy", "list-only",

node/opendataloader-pdf/src/cli-options.generated.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ export function registerCliOptions(program: Command): void {
2727
program.option('--pages <value>', 'Pages to extract (e.g., "1,3,5-7"). Default: all pages');
2828
program.option('--include-header-footer', 'Include page headers and footers in output');
2929
program.option('--detect-strikethrough', 'Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental)');
30-
program.option('--hybrid <value>', 'Hybrid backend (requires a running server). Quick start: pip install "opendataloader-pdf[hybrid]" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast');
30+
program.option('--hybrid <value>', 'Hybrid backend (requires a running server). Quick start: pip install "opendataloader-pdf[hybrid]" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai');
3131
program.option('--hybrid-mode <value>', 'Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)');
3232
program.option('--hybrid-url <value>', 'Hybrid backend server URL (overrides default)');
3333
program.option('--hybrid-timeout <value>', 'Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0');

node/opendataloader-pdf/src/convert-options.generated.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ export interface ConvertOptions {
4545
includeHeaderFooter?: boolean;
4646
/** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */
4747
detectStrikethrough?: boolean;
48-
/** Hybrid backend (requires a running server). Quick start: pip install "opendataloader-pdf[hybrid]" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast */
48+
/** Hybrid backend (requires a running server). Quick start: pip install "opendataloader-pdf[hybrid]" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */
4949
hybrid?: string;
5050
/** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */
5151
hybridMode?: string;

options.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@
166166
"type": "string",
167167
"required": false,
168168
"default": "off",
169-
"description": "Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast"
169+
"description": "Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai"
170170
},
171171
{
172172
"name": "hybrid-mode",

python/opendataloader-pdf/src/opendataloader_pdf/cli_options_generated.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@
196196
"type": "string",
197197
"required": False,
198198
"default": "off",
199-
"description": "Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast",
199+
"description": "Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai",
200200
},
201201
{
202202
"name": "hybrid-mode",

python/opendataloader-pdf/src/opendataloader_pdf/convert_generated.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def convert(
6767
pages: Pages to extract (e.g., "1,3,5-7"). Default: all pages
6868
include_header_footer: Include page headers and footers in output
6969
detect_strikethrough: Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental)
70-
hybrid: Hybrid backend (requires a running server). Quick start: pip install "opendataloader-pdf[hybrid]" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast
70+
hybrid: Hybrid backend (requires a running server). Quick start: pip install "opendataloader-pdf[hybrid]" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai
7171
hybrid_mode: Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)
7272
hybrid_url: Hybrid backend server URL (overrides default)
7373
hybrid_timeout: Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0

0 commit comments

Comments
 (0)