Skip to content

Commit c2dfde6

Browse files
committed
chore: release v0.2.3
Version bump: - Workspace and all SDK manifests: 0.2.2 → 0.2.3 - WASM pkg rebuilt with 0.2.3 binary Code quality (Clippy fixes): - Remove identical else-if branch in PSM mode selection - Replace manual range check with RangeInclusive::contains - Remove redundant & refs passed to detect_bordered_raster_grid and extract_raster_cell_text - Rewrite let-else patterns with the ? operator - Replace Vec::push chain with vec![] macro - Remove now-unused is_horizontal variable Formatting: - rustfmt pass on raster_table_ocr.rs and markdown.rs Demo: - Rebuilt demo/dist with WASM 0.2.3 binary Site: - Updated WASM API doc comment version to 0.2.3
1 parent b698a10 commit c2dfde6

16 files changed

Lines changed: 142 additions & 98 deletions

File tree

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,24 @@ this project adheres to [Semantic Versioning](https://semver.org/).
77

88
---
99

10+
## [0.2.3] — 2026-03-28
11+
12+
### Added
13+
- Hybrid OCR false-positive guard: photo/matrix heuristics now prevent non-table images from being classified as tables in raster OCR mode
14+
- Tie-aware benchmark ranking: identical scores across engines now share a rank rather than producing misleading orderings
15+
16+
### Changed
17+
- Heading detector robustness improvements for edge-case font-size clustering
18+
- Table cluster detector refined to reduce false positives on dense text regions
19+
- Benchmark HTML and terminal reporters include PBF in verdict summary; hybrid engines show correct display names
20+
- Site benchmark figures updated to 2026-03-28 snapshot: EdgeParse 0.7811 overall, 0.007 s/doc (83× faster than Docling, 49× faster than PyMuPDF4LLM, 2× faster than OpenDataLoader, TEDS 73% better than OpenDataLoader)
21+
22+
### Fixed
23+
- Raster table OCR no longer triggers on photo-heavy or matrix-style pages that lack tabular structure
24+
- Speed and quality claims on the website corrected to match current measured benchmark figures
25+
26+
---
27+
1028
## [0.2.2] — 2026-03-26
1129

1230
### Added

Cargo.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ default-members = [
1515
]
1616

1717
[workspace.package]
18-
version = "0.2.2"
18+
version = "0.2.3"
1919
edition = "2021"
2020
rust-version = "1.85"
2121
license = "Apache-2.0"

crates/edgeparse-core/src/output/markdown.rs

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8397,7 +8397,10 @@ fn looks_like_hyphenated_table_title_continuation(
83978397
text: &str,
83988398
next: Option<&ContentElement>,
83998399
) -> bool {
8400-
if !matches!(next, Some(ContentElement::Table(_)) | Some(ContentElement::TableBorder(_))) {
8400+
if !matches!(
8401+
next,
8402+
Some(ContentElement::Table(_)) | Some(ContentElement::TableBorder(_))
8403+
) {
84018404
return false;
84028405
}
84038406

@@ -8484,7 +8487,10 @@ fn looks_like_table_header_duplicate_heading(doc: &PdfDocument, idx: usize, text
84848487
if candidate.page_number() != page_number {
84858488
break;
84868489
}
8487-
if matches!(candidate, ContentElement::Table(_) | ContentElement::TableBorder(_)) {
8490+
if matches!(
8491+
candidate,
8492+
ContentElement::Table(_) | ContentElement::TableBorder(_)
8493+
) {
84888494
break;
84898495
}
84908496

@@ -13559,15 +13565,28 @@ mod tests {
1355913565
fn test_duplicate_table_header_heading_is_demoted() {
1356013566
let mut doc = PdfDocument::new("duplicate-table-header-heading.pdf".to_string());
1356113567
doc.number_of_pages = 1;
13562-
doc.kids.push(make_heading("MOHAVE COMMUNITY COLLEGE BIO181"));
13568+
doc.kids
13569+
.push(make_heading("MOHAVE COMMUNITY COLLEGE BIO181"));
1356313570
doc.kids.push(make_n_column_table(
1356413571
&[
13565-
vec!["", "Saccharometer", "DI Water", "Glucose Solution", "Yeast Suspension"],
13572+
vec![
13573+
"",
13574+
"Saccharometer",
13575+
"DI Water",
13576+
"Glucose Solution",
13577+
"Yeast Suspension",
13578+
],
1356613579
vec!["1", "", "8 ml", "6 ml", "0 ml"],
1356713580
vec!["2", "", "12 ml", "0 ml", "2 ml"],
1356813581
vec!["3", "", "6 ml", "6 ml", "2 ml"],
1356913582
],
13570-
&[(72.0, 110.0), (110.0, 210.0), (210.0, 300.0), (300.0, 430.0), (430.0, 540.0)],
13583+
&[
13584+
(72.0, 110.0),
13585+
(110.0, 210.0),
13586+
(210.0, 300.0),
13587+
(300.0, 430.0),
13588+
(430.0, 540.0),
13589+
],
1357113590
));
1357213591
doc.kids.push(make_heading_at(
1357313592
72.0,
@@ -13576,12 +13595,18 @@ mod tests {
1357613595
108.0,
1357713596
"Saccharometer DI Water Glucose Solution Yeast Suspension",
1357813597
));
13579-
doc.kids.push(make_paragraph_at(72.0, 72.0, 120.0, 88.0, "below"));
13580-
doc.kids.push(make_paragraph_at(72.0, 56.0, 240.0, 72.0, "1 16 ml 12 ml"));
13581-
doc.kids.push(make_paragraph_at(296.0, 56.0, 340.0, 72.0, "0 ml"));
13598+
doc.kids
13599+
.push(make_paragraph_at(72.0, 72.0, 120.0, 88.0, "below"));
13600+
doc.kids
13601+
.push(make_paragraph_at(72.0, 56.0, 240.0, 72.0, "1 16 ml 12 ml"));
13602+
doc.kids
13603+
.push(make_paragraph_at(296.0, 56.0, 340.0, 72.0, "0 ml"));
1358213604

1358313605
let md = to_markdown(&doc).unwrap();
13584-
assert!(md.contains("Saccharometer DI Water Glucose Solution Yeast Suspension"), "{md}");
13606+
assert!(
13607+
md.contains("Saccharometer DI Water Glucose Solution Yeast Suspension"),
13608+
"{md}"
13609+
);
1358513610
assert!(
1358613611
!md.contains("# Saccharometer DI Water Glucose Solution Yeast Suspension"),
1358713612
"{md}"

0 commit comments

Comments
 (0)