Skip to content

Commit 03d9fe6

Browse files
authored
feat(ci): enforce reference.conf CI check (#6795)
1 parent 156af72 commit 03d9fe6

3 files changed

Lines changed: 336 additions & 0 deletions

File tree

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
#!/usr/bin/env python3
2+
"""Validate java-tron reference.conf key names and hierarchy depth.
3+
4+
Rules enforced:
5+
1. Every user-defined segment of every key path must match ^[a-z][a-zA-Z0-9]*$:
6+
starts with a lowercase ASCII letter, then ASCII letters/digits only.
7+
Acronyms at position 1+ are accepted (e.g. `httpPBFTEnable`,
8+
`openHistoryQueryWhenLiteFN`, `allowShieldedTRC20Transaction`) — only the
9+
first character is constrained. This matches what java.beans.Introspector
10+
and ConfigBeanFactory actually require for bean-property auto-binding.
11+
2. Total path depth must be <= MAX_DEPTH (5). Each list/array step counts
12+
as one additional level. For example `rate.limiter.http[].component`
13+
is 5 levels deep (rate=1, limiter=2, http=3, []=4, component=5).
14+
3. ALLOWLIST entries are exempt from the format rule (legacy keys that ship
15+
in user configs; renaming would break compatibility).
16+
4. Service-binding port values must be unique. A leaf is a "service port"
17+
when its last segment is `port` or ends in `Port` (camelCase) AND its
18+
path contains no `[]` (list-element ports belong to per-element records,
19+
not to the local process). Two distinct paths binding the same int value
20+
would conflict at startup; reserved sentinels (0, -1) are exempt.
21+
22+
Parsing strategy: delegated to pyhocon (https://github.com/chimpler/pyhocon),
23+
the reference Python HOCON implementation. This avoids hand-rolled scanner
24+
pitfalls (key = { ... } prefix loss, triple-strings, substitutions, includes,
25+
+= operator, block comments). pyhocon returns a fully-merged ConfigTree where
26+
dotted-form keys are expanded into nested objects — i.e. the same canonical
27+
key set Typesafe Config / ConfigBeanFactory will see at runtime.
28+
29+
Array handling: keys inside object-elements of arrays are also user-defined
30+
config keys (e.g. each entry in `rate.limiter.rpc = [{ component=..., ... }]`
31+
is parsed by RateLimiterConfig). The walker recurses into list elements and
32+
treats the array step as a synthetic `[]` segment that contributes to depth
33+
but is not itself validated as a name. Element keys are deduplicated across
34+
list entries because well-formed arrays use homogeneous object shapes.
35+
36+
Debug mode: pass `--debug` to print every parsed key with its depth, in
37+
walk order (which mirrors the file top-to-bottom). Use this to eyeball the
38+
parser's view against reference.conf.
39+
40+
Exit code: 0 if clean, 1 if any violation remains after allowlist filtering,
41+
2 on environment errors (missing pyhocon, file not found, parse failure).
42+
43+
CI integration: invoked by the `Validate reference.conf key names and depth`
44+
step of the `checkstyle` job in `.github/workflows/pr-check.yml`. The non-zero
45+
exit on violations is what makes that step fail — there is intentionally NO
46+
extra `exit 1` in the workflow shell wrapper. A single GHA `::error` workflow
47+
command is also emitted unconditionally (not gated on the GITHUB_ACTIONS env
48+
var) so local runs produce the same output as CI; the leading `::` is
49+
harmless noise locally.
50+
"""
51+
import re
52+
import sys
53+
from pathlib import Path
54+
55+
try:
56+
from pyhocon import ConfigFactory, ConfigTree
57+
except ImportError:
58+
print(
59+
"error: pyhocon is required. Install with `pip install pyhocon`.",
60+
file=sys.stderr,
61+
)
62+
sys.exit(2)
63+
64+
# Set at the current max depth of reference.conf (5). No buffer: a mature
65+
# project should not allow silent drift, so any new key going deeper must
66+
# bump MAX_DEPTH via an explicit, reviewed change (deeper trees hurt
67+
# readability and complicate ConfigBeanFactory mapping).
68+
MAX_DEPTH = 5
69+
KEY_REGEX = re.compile(r'^[a-z][a-zA-Z0-9]*$')
70+
# Legacy keys grandfathered to keep user `config.conf` files compatible.
71+
# Do NOT extend this list for new keys — every new key must satisfy KEY_REGEX.
72+
# A future rename + deprecation cycle can shrink this set back to empty.
73+
ALLOWLIST = {
74+
# PBFT acronym in capitals — predates the auto-binding convention.
75+
"node.http.PBFTEnable",
76+
"node.http.PBFTPort",
77+
"node.rpc.PBFTEnable",
78+
"node.rpc.PBFTPort",
79+
# PascalCase exceptions handled manually in NodeConfig.fromConfig (not via
80+
# ConfigBeanFactory). Currently commented out in reference.conf, so the
81+
# parser does not see them today — listed here so the gate stays green if
82+
# a future change uncomments them with defaults.
83+
"node.shutdown.BlockTime",
84+
"node.shutdown.BlockHeight",
85+
"node.shutdown.BlockCount",
86+
}
87+
88+
# Sentinel port values exempt from the uniqueness check. 0 = disabled (the
89+
# service does not bind); -1 = auto/unset placeholder. Any number of leaves
90+
# may share these values.
91+
PORT_SENTINELS = {0, -1}
92+
93+
94+
def walk(node, path, depth):
95+
"""Yield (full_path, depth, is_leaf) for every reachable user-defined key.
96+
97+
- ConfigTree key adds one depth level and contributes a name segment.
98+
- list step adds one synthetic level rendered as `[]`. Element-internal
99+
keys are walked once per unique sub-path (homogeneous object arrays
100+
otherwise yield each field N times).
101+
- Scalars / null / list-of-scalars produce no further keys.
102+
103+
`depth` includes the array `[]` steps. `is_leaf` is True when the value
104+
at this path is a scalar/list/null — i.e. not another ConfigTree — so
105+
callers can filter leaves vs namespace intermediates.
106+
"""
107+
if isinstance(node, ConfigTree):
108+
for k, v in node.items():
109+
new_path = f"{path}.{k}" if path else k
110+
new_depth = depth + 1
111+
is_leaf = not isinstance(v, ConfigTree)
112+
yield new_path, new_depth, is_leaf
113+
yield from walk(v, new_path, new_depth)
114+
elif isinstance(node, list):
115+
array_path = f"{path}[]"
116+
array_depth = depth + 1
117+
seen = set()
118+
for elem in node:
119+
# Object element: walk its keys. Nested list element (HOCON allows
120+
# list-of-list, e.g. `a = [[{x=1}]]`): recurse so each inner [] step
121+
# also contributes to depth. Scalar elements have no sub-keys.
122+
if isinstance(elem, (ConfigTree, list)):
123+
for sub_path, sub_depth, sub_leaf in walk(elem, array_path, array_depth):
124+
if sub_path in seen:
125+
continue
126+
seen.add(sub_path)
127+
yield sub_path, sub_depth, sub_leaf
128+
129+
130+
def _is_port_segment(seg):
131+
"""Last-segment test for a service-binding port leaf.
132+
133+
Matches `port` (exact) and any camelCase form ending in `Port`
134+
(e.g. `fullNodePort`, `solidityPort`, `PBFTPort`). Deliberately rejects
135+
lowercase `port` as a suffix inside a longer word (`transport`,
136+
`support`) — those are not port keys.
137+
"""
138+
return seg == "port" or seg.endswith("Port")
139+
140+
141+
def find_port_collisions(tree, keys):
142+
"""Group service-binding port leaves by integer value; return collisions.
143+
144+
A leaf qualifies when (a) its last segment matches `_is_port_segment`,
145+
and (b) its full path contains no `[]` step. Rule (b) excludes
146+
list-element ports — e.g. `genesis.block.witnesses[].port` is the
147+
advertised port of each genesis witness record, not a port the local
148+
process binds, so two witnesses sharing a value is expected.
149+
150+
Returns sorted list of (value, sorted_paths) for any value bound by more
151+
than one path. Sentinel values in PORT_SENTINELS are excluded. Values
152+
that are not coercible to int (substitutions like `${PORT}` resolved to
153+
strings) are skipped silently — the format/depth gates do not look at
154+
values either, and a non-numeric port is a different class of error.
155+
"""
156+
by_value = {}
157+
for full_path, _depth, is_leaf in keys:
158+
if not is_leaf:
159+
continue
160+
if "[]" in full_path:
161+
continue
162+
seg = full_path.split(".")[-1]
163+
if not _is_port_segment(seg):
164+
continue
165+
try:
166+
raw = tree.get(full_path)
167+
except Exception:
168+
continue
169+
try:
170+
value = int(raw)
171+
except (TypeError, ValueError):
172+
continue
173+
if value in PORT_SENTINELS:
174+
continue
175+
by_value.setdefault(value, []).append(full_path)
176+
return sorted(
177+
(v, sorted(paths)) for v, paths in by_value.items() if len(paths) > 1
178+
)
179+
180+
181+
def main(argv):
182+
debug = False
183+
args = list(argv[1:])
184+
if args and args[0] == "--debug":
185+
debug = True
186+
args = args[1:]
187+
if len(args) != 1:
188+
print(f"usage: {argv[0]} [--debug] <path/to/reference.conf>", file=sys.stderr)
189+
return 2
190+
path = Path(args[0])
191+
if not path.is_file():
192+
print(f"error: file not found: {path}", file=sys.stderr)
193+
return 2
194+
195+
try:
196+
tree = ConfigFactory.parse_file(str(path))
197+
except Exception as e:
198+
print(f"error: failed to parse {path}: {e}", file=sys.stderr)
199+
# Mirror the violation path: emit a single GHA annotation so the
200+
# parse failure surfaces in the PR check summary, not just the log.
201+
print(f"::error file={path},title=reference.conf::failed to parse: {e}")
202+
return 2
203+
204+
keys = list(walk(tree, "", 0))
205+
206+
if debug:
207+
# Keys are yielded in pyhocon insertion order, which mirrors the
208+
# source file top-to-bottom. Eyeball this against reference.conf to
209+
# confirm coverage; the depth column makes the array `[]` steps
210+
# explicit so MAX_DEPTH math is verifiable by inspection. Trailing
211+
# `/` marks namespace intermediates (have children); bare names are
212+
# leaves — `grep -v '/$'` filters to just leaves.
213+
leaf_count = sum(1 for _, _, lf in keys if lf)
214+
print(
215+
f"DEBUG: {len(keys)} parsed keys "
216+
f"({leaf_count} leaves + {len(keys) - leaf_count} intermediates), "
217+
f"walk order:"
218+
)
219+
for full_path, depth, is_leaf in keys:
220+
label = full_path if is_leaf else full_path + "/"
221+
print(f" d={depth} {label}")
222+
print()
223+
224+
format_violations = []
225+
depth_violations = []
226+
227+
# Only check leaves: pyhocon expands a dotted-form declaration like
228+
# `a.b.c = X` into intermediate ConfigTree nodes for `a` and `a.b`. A
229+
# single user-written bad key would otherwise be reported once per
230+
# intermediate AND once as the leaf, multiplying noise. The leaf path
231+
# carries every segment, so checking just leaves covers all segments.
232+
for full_path, depth, is_leaf in keys:
233+
if not is_leaf:
234+
continue
235+
if full_path not in ALLOWLIST:
236+
for seg in full_path.split('.'):
237+
# Strip any number of trailing `[]` markers — nested arrays
238+
# produce segments like `a[][]`.
239+
while seg.endswith('[]'):
240+
seg = seg[:-2]
241+
if seg and not KEY_REGEX.match(seg):
242+
format_violations.append((full_path, seg))
243+
break
244+
245+
if depth > MAX_DEPTH:
246+
depth_violations.append((full_path, depth))
247+
248+
format_violations.sort()
249+
depth_violations.sort()
250+
251+
port_collisions = find_port_collisions(tree, keys)
252+
253+
if format_violations or depth_violations or port_collisions:
254+
lines_out = []
255+
if format_violations:
256+
lines_out.append(
257+
f"Format violations ({len(format_violations)}) — "
258+
f"each segment must match {KEY_REGEX.pattern}:"
259+
)
260+
for full_path, seg in format_violations:
261+
lines_out.append(f" format: {full_path} (segment: '{seg}')")
262+
if depth_violations:
263+
if lines_out:
264+
lines_out.append("")
265+
lines_out.append(
266+
f"Depth violations ({len(depth_violations)}) — max depth is {MAX_DEPTH} "
267+
f"(each `[]` array step counts as one level):"
268+
)
269+
for full_path, depth in depth_violations:
270+
lines_out.append(
271+
f" depth: {full_path} (depth={depth}, max={MAX_DEPTH})"
272+
)
273+
if port_collisions:
274+
if lines_out:
275+
lines_out.append("")
276+
lines_out.append(
277+
f"Port collisions ({len(port_collisions)}) — distinct service "
278+
f"ports must bind distinct values (sentinels {sorted(PORT_SENTINELS)} exempt):"
279+
)
280+
for value, paths in port_collisions:
281+
lines_out.append(
282+
f" port: value {value} bound by: {', '.join(paths)}"
283+
)
284+
print("\n".join(lines_out))
285+
print()
286+
287+
# Emit ONE consolidated GHA workflow annotation. All offending entries
288+
# are packed into the annotation body via %0A (GHA's newline escape)
289+
# so the entries are visible in the annotation summary, not just in
290+
# the job log.
291+
entries = []
292+
for full_path, seg in format_violations:
293+
entries.append(f"format: {full_path} (segment '{seg}')")
294+
for full_path, depth in depth_violations:
295+
entries.append(f"depth: {full_path} (depth={depth}, max={MAX_DEPTH})")
296+
for value, paths in port_collisions:
297+
entries.append(f"port: value {value} bound by {', '.join(paths)}")
298+
body = (
299+
f"reference.conf has {len(format_violations)} format + "
300+
f"{len(depth_violations)} depth + {len(port_collisions)} port "
301+
f"violation(s):%0A" + "%0A".join(entries)
302+
)
303+
print(f"::error file={path},title=reference.conf::{body}")
304+
print(
305+
f"FAIL: {len(format_violations)} format + {len(depth_violations)} depth "
306+
f"+ {len(port_collisions)} port violation(s) in {path}",
307+
file=sys.stderr,
308+
)
309+
return 1
310+
311+
print(
312+
f"OK: {path}{len(keys)} keys, all lowerCamelCase, depth <= {MAX_DEPTH}, "
313+
f"service ports unique"
314+
)
315+
return 0
316+
317+
318+
if __name__ == "__main__":
319+
sys.exit(main(sys.argv))

.github/scripts/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pyhocon==0.3.63

.github/workflows/pr-check.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,22 @@ jobs:
103103
steps:
104104
- uses: actions/checkout@v5
105105

106+
- name: Set up Python
107+
uses: actions/setup-python@v5
108+
with:
109+
python-version: '3.11'
110+
cache: 'pip'
111+
cache-dependency-path: .github/scripts/requirements.txt
112+
113+
- name: Install pyhocon
114+
run: pip install --quiet -r .github/scripts/requirements.txt
115+
116+
- name: Validate reference.conf key names and depth
117+
shell: bash
118+
run: |
119+
python3 .github/scripts/check_reference_conf.py \
120+
common/src/main/resources/reference.conf
121+
106122
- name: Set up JDK 17
107123
uses: actions/setup-java@v5
108124
with:

0 commit comments

Comments
 (0)