Skip to content

Commit 2973b08

Browse files
committed
Downgrade kreuzberg because of license change
1 parent 7599023 commit 2973b08

3 files changed

Lines changed: 11 additions & 15 deletions

File tree

Cargo.lock

Lines changed: 4 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ google-cloud-token = { version = "0.1", optional = true }
350350
hickory-resolver = { version = "0.25", features = ["tokio", "system-config"], optional = true }
351351
hostname = { version = "0.4.2", optional = true }
352352
jsonschema = { version = "0.29", optional = true }
353-
kreuzberg = { version = "4.8", default-features = false, features = ["tokio-runtime", "bundled-pdfium", "office", "excel", "ocr"], optional = true }
353+
kreuzberg = { version = "~4.7", default-features = false, features = ["tokio-runtime", "bundled-pdfium", "office", "excel", "ocr"], optional = true }
354354
metrics = { version = "0.24", optional = true }
355355
metrics-exporter-prometheus = { version = "0.16", optional = true }
356356
open = { version = "5.3.3", optional = true }

src/services/document_processor.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2191,16 +2191,20 @@ fn build_kreuzberg_config(config: &DocumentExtractionConfig) -> kreuzberg::Extra
21912191
kreuzberg_config.ocr = Some(kreuzberg::OcrConfig {
21922192
backend: "tesseract".to_string(),
21932193
language: config.ocr_language.clone(),
2194-
..Default::default()
2194+
tesseract_config: None,
2195+
output_format: None,
2196+
paddle_ocr_config: None,
2197+
element_config: None,
21952198
});
21962199
kreuzberg_config.force_ocr = config.force_ocr;
21972200
}
21982201

21992202
// Configure PDF-specific options
22002203
kreuzberg_config.pdf_options = Some(kreuzberg::PdfConfig {
22012204
extract_images: config.pdf_extract_images,
2205+
passwords: None,
22022206
extract_metadata: true,
2203-
..Default::default()
2207+
hierarchy: None,
22042208
});
22052209

22062210
// Configure image extraction settings (includes DPI for OCR)
@@ -2212,7 +2216,6 @@ fn build_kreuzberg_config(config: &DocumentExtractionConfig) -> kreuzberg::Extra
22122216
auto_adjust_dpi: true,
22132217
min_dpi: 72,
22142218
max_dpi: 600,
2215-
..Default::default()
22162219
});
22172220
}
22182221

0 commit comments

Comments
 (0)