-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathdocument.rs
More file actions
124 lines (114 loc) · 3.72 KB
/
document.rs
File metadata and controls
124 lines (114 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
//! PdfDocument — top-level extracted document.
use serde::{Deserialize, Serialize};
use super::content::ContentElement;
/// The top-level extracted PDF document.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PdfDocument {
/// Original file name
pub file_name: String,
/// Original source path when available.
#[serde(skip_serializing_if = "Option::is_none")]
pub source_path: Option<String>,
/// Number of pages
pub number_of_pages: u32,
/// Document author
pub author: Option<String>,
/// Document title
pub title: Option<String>,
/// Creation date
pub creation_date: Option<String>,
/// Modification date
pub modification_date: Option<String>,
/// PDF producer application
pub producer: Option<String>,
/// Creator application
pub creator: Option<String>,
/// Document subject
pub subject: Option<String>,
/// Comma-separated keywords
pub keywords: Option<String>,
/// Top-level content elements (reading order)
pub kids: Vec<ContentElement>,
}
impl PdfDocument {
/// Create a new empty PdfDocument.
pub fn new(file_name: String) -> Self {
Self {
file_name,
source_path: None,
number_of_pages: 0,
author: None,
title: None,
creation_date: None,
modification_date: None,
producer: None,
creator: None,
subject: None,
keywords: None,
kids: Vec::new(),
}
}
/// Return a list of (key, value) pairs for non-empty metadata fields.
pub fn metadata_pairs(&self) -> Vec<(&str, &str)> {
let mut pairs = Vec::new();
pairs.push(("File", self.file_name.as_str()));
if let Some(ref v) = self.title {
pairs.push(("Title", v.as_str()));
}
if let Some(ref v) = self.author {
pairs.push(("Author", v.as_str()));
}
if let Some(ref v) = self.subject {
pairs.push(("Subject", v.as_str()));
}
if let Some(ref v) = self.keywords {
pairs.push(("Keywords", v.as_str()));
}
if let Some(ref v) = self.creator {
pairs.push(("Creator", v.as_str()));
}
if let Some(ref v) = self.producer {
pairs.push(("Producer", v.as_str()));
}
if let Some(ref v) = self.creation_date {
pairs.push(("Created", v.as_str()));
}
if let Some(ref v) = self.modification_date {
pairs.push(("Modified", v.as_str()));
}
pairs
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_document() {
let doc = PdfDocument::new("test.pdf".to_string());
assert_eq!(doc.file_name, "test.pdf");
assert_eq!(doc.source_path, None);
assert_eq!(doc.number_of_pages, 0);
assert!(doc.kids.is_empty());
}
#[test]
fn test_metadata_pairs() {
let mut doc = PdfDocument::new("report.pdf".to_string());
doc.title = Some("Annual Report".to_string());
doc.author = Some("Alice".to_string());
doc.keywords = Some("finance, report".to_string());
let pairs = doc.metadata_pairs();
assert_eq!(pairs[0], ("File", "report.pdf"));
assert_eq!(pairs[1], ("Title", "Annual Report"));
assert_eq!(pairs[2], ("Author", "Alice"));
assert_eq!(pairs[3], ("Keywords", "finance, report"));
assert_eq!(pairs.len(), 4);
}
#[test]
fn test_metadata_pairs_empty() {
let doc = PdfDocument::new("test.pdf".to_string());
let pairs = doc.metadata_pairs();
// Only "File" present
assert_eq!(pairs.len(), 1);
assert_eq!(pairs[0].0, "File");
}
}