-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathcontent.rs
More file actions
128 lines (121 loc) · 4.33 KB
/
content.rs
File metadata and controls
128 lines (121 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
//! Unified ContentElement enum — all page content.
use serde::{Deserialize, Serialize};
use super::bbox::BoundingBox;
use super::chunks::{ImageChunk, LineArtChunk, LineChunk, TextChunk};
use super::list::PDFList;
use super::semantic::{
SemanticCaption, SemanticFigure, SemanticFormula, SemanticHeaderOrFooter, SemanticHeading,
SemanticNumberHeading, SemanticParagraph, SemanticPicture, SemanticTable,
};
use super::table::TableBorder;
use super::text::{TextBlock, TextLine};
/// Unified enum for all content elements on a page.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ContentElement {
/// Raw text chunk
TextChunk(TextChunk),
/// Grouped text line
TextLine(TextLine),
/// Grouped text block
TextBlock(TextBlock),
/// Image bounding box
Image(ImageChunk),
/// Line segment
Line(LineChunk),
/// Vector graphic
LineArt(LineArtChunk),
/// Table border structure
TableBorder(TableBorder),
/// List
List(PDFList),
/// Paragraph
Paragraph(SemanticParagraph),
/// Heading
Heading(SemanticHeading),
/// Numbered heading
NumberHeading(SemanticNumberHeading),
/// Caption
Caption(SemanticCaption),
/// Header or footer
HeaderFooter(SemanticHeaderOrFooter),
/// Figure
Figure(SemanticFigure),
/// Formula
Formula(SemanticFormula),
/// Picture with description
Picture(SemanticPicture),
/// Table (semantic wrapper)
Table(SemanticTable),
}
impl ContentElement {
/// Get the bounding box of this element.
pub fn bbox(&self) -> &BoundingBox {
match self {
Self::TextChunk(e) => &e.bbox,
Self::TextLine(e) => &e.bbox,
Self::TextBlock(e) => &e.bbox,
Self::Image(e) => &e.bbox,
Self::Line(e) => &e.bbox,
Self::LineArt(e) => &e.bbox,
Self::TableBorder(e) => &e.bbox,
Self::List(e) => &e.bbox,
Self::Paragraph(e) => &e.base.bbox,
Self::Heading(e) => &e.base.base.bbox,
Self::NumberHeading(e) => &e.base.base.base.bbox,
Self::Caption(e) => &e.base.bbox,
Self::HeaderFooter(e) => &e.bbox,
Self::Figure(e) => &e.bbox,
Self::Formula(e) => &e.bbox,
Self::Picture(e) => &e.bbox,
Self::Table(e) => &e.bbox,
}
}
/// Get the global index.
pub fn index(&self) -> Option<u32> {
match self {
Self::TextChunk(e) => e.index.map(|i| i as u32),
Self::TextLine(e) => e.index,
Self::TextBlock(e) => e.index,
Self::Image(e) => e.index,
Self::Line(e) => e.index,
Self::LineArt(e) => e.index,
Self::TableBorder(e) => e.index,
Self::List(e) => e.index,
Self::Paragraph(e) => e.base.index,
Self::Heading(e) => e.base.base.index,
Self::NumberHeading(e) => e.base.base.base.index,
Self::Caption(e) => e.base.index,
Self::HeaderFooter(e) => e.index,
Self::Figure(e) => e.index,
Self::Formula(e) => e.index,
Self::Picture(e) => e.index,
Self::Table(e) => e.index,
}
}
/// Get the page number.
pub fn page_number(&self) -> Option<u32> {
self.bbox().page_number
}
/// Set the global index.
pub fn set_index(&mut self, idx: u32) {
match self {
Self::TextChunk(e) => e.index = Some(idx as usize),
Self::TextLine(e) => e.index = Some(idx),
Self::TextBlock(e) => e.index = Some(idx),
Self::Image(e) => e.index = Some(idx),
Self::Line(e) => e.index = Some(idx),
Self::LineArt(e) => e.index = Some(idx),
Self::TableBorder(e) => e.index = Some(idx),
Self::List(e) => e.index = Some(idx),
Self::Paragraph(e) => e.base.index = Some(idx),
Self::Heading(e) => e.base.base.index = Some(idx),
Self::NumberHeading(e) => e.base.base.base.index = Some(idx),
Self::Caption(e) => e.base.index = Some(idx),
Self::HeaderFooter(e) => e.index = Some(idx),
Self::Figure(e) => e.index = Some(idx),
Self::Formula(e) => e.index = Some(idx),
Self::Picture(e) => e.index = Some(idx),
Self::Table(e) => e.index = Some(idx),
}
}
}