-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathenums.rs
More file actions
173 lines (162 loc) · 3.8 KB
/
enums.rs
File metadata and controls
173 lines (162 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
//! Enumerations for EdgeParse data models.
use serde::{Deserialize, Serialize};
/// Semantic type classification for PDF elements.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum SemanticType {
/// Document root
Document,
/// Generic division
Div,
/// Text paragraph
Paragraph,
/// Inline span
Span,
/// Table element
Table,
/// Table headers section
TableHeaders,
/// Table footer section
TableFooter,
/// Table body section
TableBody,
/// Table row
TableRow,
/// Table header cell
TableHeader,
/// Table data cell
TableCell,
/// Form element
Form,
/// Hyperlink
Link,
/// Annotation
Annot,
/// Caption for image or table
Caption,
/// List container
List,
/// List item label
ListLabel,
/// List item body
ListBody,
/// List item
ListItem,
/// Table of contents
TableOfContent,
/// Table of contents item
TableOfContentItem,
/// Figure/image
Figure,
/// Numbered heading
NumberHeading,
/// Heading
Heading,
/// Title
Title,
/// Block quote
BlockQuote,
/// Footnote/endnote
Note,
/// Page header
Header,
/// Page footer
Footer,
/// Code block
Code,
/// Part/section
Part,
}
impl SemanticType {
/// Whether this type should be ignored in normal processing.
pub fn is_ignored_standard_type(&self) -> bool {
matches!(
self,
SemanticType::Div
| SemanticType::Span
| SemanticType::Form
| SemanticType::Link
| SemanticType::Annot
)
}
}
/// Text alignment within a block.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TextAlignment {
/// Left-aligned
Left,
/// Right-aligned
Right,
/// Center-aligned
Center,
/// Justified
Justify,
}
/// Text format (baseline position).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum TextFormat {
/// Normal baseline
#[default]
Normal,
/// Superscript
Superscript,
/// Subscript
Subscript,
}
/// Text type classification.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum TextType {
/// Regular text
#[default]
Regular,
/// Large text
Large,
/// Logo/title text
Logo,
}
/// Processing layer that produced/modified an element.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum PdfLayer {
/// Main content layer (initial extraction)
#[default]
Main,
/// Raw content extraction
Content,
/// Table cell assignment
TableCells,
/// List item detection
ListItems,
/// Table content processing
TableContent,
/// List content processing
ListContent,
/// Text block processing
TextBlockContent,
/// Header and footer processing
HeaderAndFooterContent,
}
/// Triage decision for hybrid mode.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TriageDecision {
/// Process locally (Rust pipeline)
Local,
/// Send to backend
Backend,
/// Use both and merge
Both,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_semantic_type_ignored() {
assert!(SemanticType::Div.is_ignored_standard_type());
assert!(SemanticType::Span.is_ignored_standard_type());
assert!(!SemanticType::Paragraph.is_ignored_standard_type());
assert!(!SemanticType::Heading.is_ignored_standard_type());
assert!(!SemanticType::Table.is_ignored_standard_type());
}
#[test]
fn test_text_format_default() {
assert_eq!(TextFormat::default(), TextFormat::Normal);
}
}