|
1 | 1 | import { extractFieldKeyword } from '../field-keyword.js'; |
2 | 2 | import { CASE_INSENSITIVE_GENERAL_FORMATS, GENERAL_FORMATS } from './page-number-field-switches.js'; |
3 | 3 |
|
4 | | -const TOKEN_PATTERN = /"((?:[^"\\]|\\.)*)"|\\[#*]|\\[^\s]+|[^\s]+/g; |
| 4 | +const TOKEN_PATTERN = /"((?:[^"\\]|\\.)*)"|\\[#*](?=\s|$)|\\[^\s]+|[^\s]+/g; |
5 | 5 |
|
6 | 6 | /** |
7 | 7 | * @typedef {'next' | 'current'} SeqMode |
@@ -96,28 +96,30 @@ export function parseSeqInstruction(instruction) { |
96 | 96 | continue; |
97 | 97 | } |
98 | 98 |
|
99 | | - if (token === '\\*') { |
100 | | - const value = tokens[index + 1]?.value; |
101 | | - if (value != null && !value.startsWith('\\')) { |
| 99 | + const attachedGeneralFormat = parseAttachedGeneralFormatSwitch(token); |
| 100 | + if (normalized === '\\*' || attachedGeneralFormat != null) { |
| 101 | + const value = attachedGeneralFormat ?? tokens[index + 1]?.value; |
| 102 | + if (value != null && (attachedGeneralFormat != null || !value.startsWith('\\'))) { |
102 | 103 | result.format = value; |
103 | 104 | result.hasGeneralFormat = true; |
104 | 105 | applyGeneralFormat(result, value); |
105 | | - index += 1; |
| 106 | + if (attachedGeneralFormat == null) index += 1; |
106 | 107 | } else { |
107 | 108 | result.unknownSwitches.push(token); |
108 | 109 | } |
109 | 110 | continue; |
110 | 111 | } |
111 | 112 |
|
112 | | - if (token === '\\#') { |
113 | | - const value = tokens[index + 1]?.value; |
114 | | - if (value != null && !value.startsWith('\\')) { |
| 113 | + const attachedNumericPicture = parseAttachedNumericPictureSwitch(token); |
| 114 | + if (normalized === '\\#' || attachedNumericPicture != null) { |
| 115 | + const value = attachedNumericPicture ?? tokens[index + 1]?.value; |
| 116 | + if (value != null && (attachedNumericPicture != null || !value.startsWith('\\'))) { |
115 | 117 | if (result.numericPictureFormat == null) { |
116 | 118 | result.numericPictureFormat = { picture: value }; |
117 | 119 | } else { |
118 | 120 | result.unknownSwitches.push(token, value); |
119 | 121 | } |
120 | | - index += 1; |
| 122 | + if (attachedNumericPicture == null) index += 1; |
121 | 123 | } else { |
122 | 124 | result.unknownSwitches.push(token); |
123 | 125 | } |
@@ -240,6 +242,38 @@ function parseAttachedNumericSwitch(token) { |
240 | 242 | }; |
241 | 243 | } |
242 | 244 |
|
| 245 | +/** |
| 246 | + * Word can serialize general-format switches without a separating space |
| 247 | + * (`\*roman`). Normalize those into the same path as `\* roman`. |
| 248 | + * |
| 249 | + * @param {string} token |
| 250 | + */ |
| 251 | +function parseAttachedGeneralFormatSwitch(token) { |
| 252 | + const match = /^\\\*(\S+)$/.exec(token); |
| 253 | + return normalizeAttachedSwitchValue(match?.[1]); |
| 254 | +} |
| 255 | + |
| 256 | +/** |
| 257 | + * Keep attached numeric picture switches (`\#00`) equivalent to `\# 00`. |
| 258 | + * |
| 259 | + * @param {string} token |
| 260 | + */ |
| 261 | +function parseAttachedNumericPictureSwitch(token) { |
| 262 | + const match = /^\\#(\S+)$/.exec(token); |
| 263 | + return normalizeAttachedSwitchValue(match?.[1]); |
| 264 | +} |
| 265 | + |
| 266 | +/** |
| 267 | + * @param {string | undefined} value |
| 268 | + */ |
| 269 | +function normalizeAttachedSwitchValue(value) { |
| 270 | + if (value == null) return null; |
| 271 | + if (value.startsWith('"') && value.endsWith('"')) { |
| 272 | + return unescapeQuotedToken(value.slice(1, -1)); |
| 273 | + } |
| 274 | + return value; |
| 275 | +} |
| 276 | + |
243 | 277 | /** |
244 | 278 | * @param {ParsedSeqInstruction} result |
245 | 279 | * @param {string} value |
|
0 commit comments