-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathpdf_cmap_parser.cpp
More file actions
137 lines (113 loc) · 3.8 KB
/
Copy pathpdf_cmap_parser.cpp
File metadata and controls
137 lines (113 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <odr/internal/pdf/pdf_cmap_parser.hpp>
#include <odr/internal/pdf/pdf_cmap.hpp>
#include <odr/internal/util/byte_util.hpp>
#include <iostream>
namespace odr::internal::pdf {
using char_type = std::streambuf::char_type;
using int_type = std::streambuf::int_type;
static constexpr int_type eof = std::streambuf::traits_type::eof();
CMapParser::CMapParser(std::istream &in) : m_parser(in) {}
std::istream &CMapParser::in() { return m_parser.in(); }
std::streambuf &CMapParser::sb() { return m_parser.sb(); }
const ObjectParser &CMapParser::parser() { return m_parser; }
std::variant<Object, std::string> CMapParser::read_token() {
if (m_parser.peek_number()) {
return std::visit([](auto n) { return Object(n); },
m_parser.read_integer_or_real());
}
if (m_parser.peek_string()) {
return std::visit([](auto s) { return Object(std::move(s)); },
m_parser.read_string());
}
if (m_parser.peek_name()) {
return Object(m_parser.read_name());
}
if (m_parser.peek_dictionary()) {
return Object(m_parser.read_dictionary());
}
std::string token;
while (true) {
const int_type i = m_parser.geti();
if (i == eof) {
return token;
}
const auto c = static_cast<char_type>(i);
if (ObjectParser::is_whitespace(c)) {
return token;
}
m_parser.bumpc();
token += c;
}
}
void CMapParser::read_codespacerange(const std::uint32_t n,
[[maybe_unused]] const CMap &cmap) {
m_parser.skip_whitespace();
for (std::uint32_t i = 0; i < n; ++i) {
auto from_glyph = m_parser.read_object();
m_parser.skip_whitespace();
auto to_glyph = m_parser.read_object();
m_parser.skip_whitespace();
// TODO
}
}
void CMapParser::read_bfchar(const std::uint32_t n, CMap &cmap) {
m_parser.skip_whitespace();
for (std::uint32_t i = 0; i < n; ++i) {
std::string glyph = m_parser.read_object().as_string();
m_parser.skip_whitespace();
std::string unicode = m_parser.read_object().as_string();
m_parser.skip_whitespace();
util::byte::reverse_bytes(reinterpret_cast<char16_t *>(unicode.data()),
unicode.size() / 2);
std::u16string_view unicode16(
reinterpret_cast<const char16_t *>(unicode.data()), unicode.size() / 2);
if (glyph.length() != 1) {
std::cerr << "unexpected glyph length" << std::endl;
}
if (unicode16.length() != 1) {
std::cerr << "unexpected unicode length" << std::endl;
}
cmap.map_bfchar(glyph[0], unicode16[0]);
}
}
void CMapParser::read_bfrange(const std::uint32_t n,
[[maybe_unused]] const CMap &cmap) {
m_parser.skip_whitespace();
for (std::uint32_t i = 0; i < n; ++i) {
auto from_glyph = m_parser.read_object();
m_parser.skip_whitespace();
auto to_glyph = m_parser.read_object();
m_parser.skip_whitespace();
auto unicode = m_parser.read_object();
m_parser.skip_whitespace();
// TODO
}
}
CMap CMapParser::parse_cmap() {
CMap cmap;
std::uint32_t last_int{};
m_parser.skip_whitespace();
while (true) {
Token token = read_token();
if (in().eof()) {
break;
}
m_parser.skip_whitespace();
if (std::holds_alternative<Object>(token)) {
if (const Object &object = std::get<Object>(token); object.is_integer()) {
last_int = object.as_integer();
}
} else if (std::holds_alternative<std::string>(token)) {
if (const std::string &command = std::get<std::string>(token);
command == "begincodespacerange") {
read_codespacerange(last_int, cmap);
} else if (command == "beginbfchar") {
read_bfchar(last_int, cmap);
} else if (command == "beginbfrange") {
read_bfrange(last_int, cmap);
}
}
}
return cmap;
}
} // namespace odr::internal::pdf