Skip to content

Commit 893991a

Browse files
committed
[WIP] Add Lua filter to extract Part 3 into JSON.
1 parent 7b14e48 commit 893991a

2 files changed

Lines changed: 186 additions & 2 deletions

File tree

build.sh

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ DIFFPDF_OUTPUT=""
88
DIFFTEX_OUTPUT=""
99
DOCX_OUTPUT=""
1010
HTML_OUTPUT=""
11+
JSON_OUTPUT=""
1112
LATEX_OUTPUT=""
1213
LATEX_OVERRIDE=""
1314
TYPST_OUTPUT=""
@@ -79,7 +80,7 @@ print_usage() {
7980
}
8081

8182

82-
if ! options=$(getopt --longoptions=help,puppeteer,gitversion,gitstatus,nogitversion,table_rules,plain_quotes,versioned_filenames,pr_number:,pr_repo:,diffbase:,pdf:,diffpdf:,difftex:,diffpdflog:,latex:,latex_override:,typst:,pdflog:,pdf_engine:,template:,template_html:,html_stylesheet:,reference_doc:,docx:,crossref:,html:,resourcedir:,noautobackmatter,csl: --options="" -- "$@"); then
83+
if ! options=$(getopt --longoptions=help,puppeteer,gitversion,gitstatus,nogitversion,table_rules,plain_quotes,versioned_filenames,pr_number:,pr_repo:,diffbase:,pdf:,diffpdf:,difftex:,diffpdflog:,latex:,latex_override:,typst:,pdflog:,pdf_engine:,template:,template_html:,html_stylesheet:,reference_doc:,docx:,crossref:,html:,json:,resourcedir:,noautobackmatter,csl: --options="" -- "$@"); then
8384
echo "Incorrect options provided"
8485
print_usage
8586
exit 1
@@ -165,6 +166,10 @@ while true; do
165166
HTML_OUTPUT="${2}"
166167
shift 2
167168
;;
169+
--json)
170+
JSON_OUTPUT="${2}"
171+
shift 2
172+
;;
168173
--template)
169174
# TODO: If simultaneous LaTeX and Typst-based PDF generation is required,
170175
# then we need separate --template_latex and --template_typst flags.
@@ -248,7 +253,7 @@ if [ ! -e "${INPUT_FILE}" ]; then
248253
fi
249254

250255
# at least one output must be requested
251-
if [ -z "${PDF_OUTPUT}${LATEX_OUTPUT}${DOCX_OUTPUT}${HTML_OUTPUT}" ]; then
256+
if [ -z "${PDF_OUTPUT}${LATEX_OUTPUT}${DOCX_OUTPUT}${HTML_OUTPUT}${JSON_OUTPUT}" ]; then
252257
>&2 echo "Expected --pdf, --docx, --html, or --latex option"
253258
print_usage
254259
exit 1
@@ -495,12 +500,17 @@ if [ "${VERSIONED_FILENAMES}" == "yes" ]; then
495500
if [ ! -z "${HTML_OUTPUT}" ]; then
496501
HTML_OUTPUT=$(prefix_filename "${version_prefix}" "${HTML_OUTPUT}")
497502
fi
503+
if [ ! -z "${JSON_OUTPUT}" ]; then
504+
JSON_OUTPUT=$(prefix_filename "${version_prefix}" "${JSON_OUTPUT}")
505+
fi
506+
498507
fi
499508
readonly PDF_OUTPUT
500509
readonly DIFFPDF_OUTPUT
501510
readonly DIFFTEX_OUTPUT
502511
readonly DOCX_OUTPUT
503512
readonly HTML_OUTPUT
513+
readonly JSON_OUTPUT
504514
readonly LATEX_OUTPUT
505515
readonly PDFLOG_OUTPUT
506516
readonly DIFFPDFLOG_OUTPUT
@@ -510,6 +520,7 @@ readonly RESOURCE_PATH=".:/resources:${RESOURCE_DIR}"
510520
echo "Starting Build with"
511521
echo "file: ${INPUT_FILE}"
512522
echo "docx: ${DOCX_OUTPUT:-none}"
523+
echo "json: ${JSON_OUTPUT:-none}"
513524
echo "pdf: ${PDF_OUTPUT:-none} (engine: ${PDF_ENGINE})"
514525
echo "diff pdf: ${DIFFPDF_OUTPUT:-none} (engine: ${PDF_ENGINE})"
515526
echo "latex: ${latex_ouput:-none}"
@@ -1067,6 +1078,30 @@ do_html() {
10671078
fi
10681079
}
10691080

1081+
do_json() {
1082+
local input=$1
1083+
local output=$2
1084+
mkdir -p "$(dirname ${output})"
1085+
1086+
echo "Generating JSON Output"
1087+
local start=$(date +%s)
1088+
local cmd=(pandoc
1089+
--standalone
1090+
--lua-filter=part3-command-tables-to-json.lua
1091+
--data-dir=/resources
1092+
--from=${FROM}
1093+
--to=plain
1094+
--output="'${output}'"
1095+
"'${input}'")
1096+
retry 1 "${cmd[@]}"
1097+
if [ $? -ne 0 ]; then
1098+
FAILED=true
1099+
echo "JSON output failed"
1100+
fi
1101+
local end=$(date +%s)
1102+
echo "Elapsed time: $(($end-$start)) seconds"
1103+
}
1104+
10701105
do_md_fixups "${BUILD_DIR}/${INPUT_FILE}"
10711106

10721107
# Generate .typ output if either typst or pdf format (using the Typst engine) were requested.
@@ -1119,6 +1154,11 @@ if [ -n "${DOCX_OUTPUT}" ]; then
11191154
do_docx "${BUILD_DIR}/${INPUT_FILE}" "${SOURCE_DIR}/${DOCX_OUTPUT}"
11201155
fi
11211156

1157+
# Generate the JSON output
1158+
if [ -n "${JSON_OUTPUT}" ]; then
1159+
do_json "${BUILD_DIR}/${INPUT_FILE}" "${SOURCE_DIR}/${JSON_OUTPUT}"
1160+
fi
1161+
11221162
# Diffs may fail in some circumstances. Do not fail the entire workflow here.
11231163
PRE_DIFFING_FAILED="${FAILED}"
11241164

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
local json = require 'pandoc.json'
2+
3+
local FieldType = {
4+
COMMAND = 1,
5+
HANDLE = 2,
6+
PARAMETER = 3,
7+
}
8+
9+
-- Extracts the field type from a cell of the form `========== [TYPE] ==========`
10+
function getFieldTypeFromSpan(text)
11+
-- ^ : Start of string
12+
-- %=+ : One or more "=" (the % escapes the = character)
13+
-- %s* : Zero or more spaces
14+
-- (.-) : Capture the shortest possible sequence of any characters (the target text)
15+
-- %s* : Zero or more spaces
16+
-- %=+ : One or more "="
17+
-- $ : End of string
18+
local extracted = text:match("^%=+%s*(.-)%s*%=+$")
19+
20+
if extracted == "Handles" then
21+
return FieldType.HANDLE
22+
elseif extracted == "Parameters" then
23+
return FieldType.PARAMETER
24+
else
25+
return nil
26+
end
27+
end
28+
29+
-- Extracts a JSON representation of a command table from Part 3.
30+
--
31+
-- Must have three columns: Type, Name, and Description
32+
-- Colspan separators can either be 'Handles' or 'Parameters'
33+
-- Any fields that appear before the first colspan separator are assumed to be command-code fields.
34+
function extractCommandFields(tbl)
35+
local caption = pandoc.utils.stringify(tbl.caption.long)
36+
if #tbl.head.rows ~= 1 then
37+
print(string.format("Table '%s' has %d rows, expected 1", caption, #tbl.head.rows))
38+
return nil
39+
end
40+
41+
local header = tbl.head.rows[1]
42+
43+
if #header.cells ~= 3 then
44+
print(string.format("Table '%s' has %d header cells, expected 3", caption, #header.cells))
45+
return nil
46+
end
47+
48+
local header_1 = pandoc.utils.stringify(header.cells[1].contents)
49+
local header_2 = pandoc.utils.stringify(header.cells[2].contents)
50+
local header_3 = pandoc.utils.stringify(header.cells[3].contents)
51+
52+
if header_1 ~= "Type" or header_2 ~= "Name" or header_3 ~= "Description" then
53+
print(string.format("Table '%s' has malformed header cells: '%s', '%s', '%s'. Expected 'Type', 'Name' and 'Description'",
54+
caption, header_1, header_2, header_3))
55+
return nil
56+
end
57+
58+
if #tbl.bodies ~= 1 then
59+
print(string.format("Table '%s' has %d bodies, expected 1", caption, #tbl.bodies))
60+
return nil
61+
end
62+
63+
local fields = {command_fields = {}, handle_fields = {}, parameter_fields = {}}
64+
local current_field_type = FieldType.COMMAND
65+
66+
for i, row in ipairs(tbl.bodies[1].body) do
67+
if #row.cells == 1 then
68+
-- We're in a colspan which indicates the type of fields that follow.
69+
70+
local span_contents = pandoc.utils.stringify(row.cells[1].contents)
71+
current_field_type = getFieldTypeFromSpan(span_contents)
72+
73+
if current_field_type == nil then
74+
print(string.format("Table '%s' has malformed span: '%s', expected a valid field type ('Handles' or 'Parameters')",
75+
caption, span_contents))
76+
return nil
77+
end
78+
elseif #row.cells == 3 then
79+
type_cell = pandoc.utils.stringify(row.cells[1].contents)
80+
name_cell = pandoc.utils.stringify(row.cells[2].contents)
81+
desc_cell = pandoc.utils.stringify(row.cells[3].contents)
82+
83+
local field = {type = type_cell, name = name_cell, description = desc_cell}
84+
85+
if current_field_type == FieldType.COMMAND then
86+
table.insert(fields.command_fields, field)
87+
elseif current_field_type == FieldType.HANDLE then
88+
table.insert(fields.handle_fields, field)
89+
elseif current_field_type == FieldType.PARAMETER then
90+
table.insert(fields.parameter_fields, field)
91+
end
92+
else
93+
print(string.format("Table '%s' row %d has %d columns, expected 1", caption, i, #row.cells))
94+
return nil
95+
end
96+
end
97+
98+
return fields
99+
end
100+
101+
-- Takes a table of entries whose captions are of the form "[Command name] [Command/Response]"
102+
-- and collates the command and response fields.
103+
function collateCommandsAndResponses(data_entries)
104+
local collated = {}
105+
106+
for _, table_data in ipairs(data_entries) do
107+
local command_name, type = string.match(table_data["caption"], "(%w+) (%w+)")
108+
if collated[command_name] == nil then
109+
collated[command_name] = {}
110+
end
111+
112+
if type == "Command" then
113+
collated[command_name]["command"] = table_data["fields"]
114+
elseif type == "Response" then
115+
collated[command_name]["response"] = table_data["fields"]
116+
else
117+
print(string.format("Table '%s' has malformed type, expected 'Command' or 'Response'", table_data["caption"]))
118+
end
119+
end
120+
121+
return collated
122+
end
123+
124+
function Pandoc(doc)
125+
local table_fields = {}
126+
127+
for _, block in ipairs(doc.blocks) do
128+
if block.t == "Table" then
129+
local fields = extractCommandFields(block)
130+
if fields ~= nil then
131+
table.insert(table_fields, {caption = pandoc.utils.stringify(block.caption.long), fields = fields})
132+
end
133+
end
134+
end
135+
136+
print(string.format("Extracted data from %d tables", #table_fields))
137+
print("Collating tables")
138+
139+
local collated = collateCommandsAndResponses(table_fields)
140+
141+
-- Overwrite the entire document with a single code block containing the JSON
142+
local json_string = json.encode(collated)
143+
return pandoc.Pandoc({pandoc.CodeBlock(json_string)})
144+
end

0 commit comments

Comments
 (0)