Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 112 additions & 7 deletions scripts/build_keyword_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,34 @@
# the deck appends a tracer/phase suffix to form the actual keyword name.
# TVDP is documented as such: real decks write e.g. TVDPFSEA (free tracer
# SEA), TVDPSIGS (solution tracer IGS), TVDPFWT1 (free tracer WT1).
TEMPLATE_KEYWORD_NAMES = frozenset({"TVDP"})
# FIP is similar: the manual states "FIP as the first three characters
# followed by up to a five letter character string", producing deck tokens
# like FIPZON, FIPGL, FIPNL, FIPUNIT, FIPHC, ….
TEMPLATE_KEYWORD_NAMES = frozenset({"TVDP", "FIP"})

# Keywords whose record body is conventionally spread across multiple
# lines, with only the line carrying '/' completing the record. opm-common's
# size_type rarely flags these (the items aren't ``size_type: "ALL"``), but
# real decks routinely split them — MESSAGES is the canonical fixed-record
# case (12-13 INT values across two lines), and VFPPROD/VFPINJ's axis and
# BHP-table records each span many lines:
#
# VFPPROD
# 1 1535 LIQ WCT GOR THP PUMP METRIC BHP /
# 100.0 123.0 ... 228.0
# 280.0 ... 5000.0 /
# ...
#
# Tag these explicitly so per-line missing-'/' diagnostics are suppressed.
VARIADIC_RECORD_KEYWORDS = frozenset({"MESSAGES", "VFPPROD", "VFPINJ"})

# Multi-record keywords whose block ends with the trailing record's '/'
# rather than a separate standalone '/'. opm-common classifies them as
# 'list' (size = None/string sentinel) but real decks never close them
# with a standalone '/' — the next keyword is what ends the block.
# Reclassified to 'fixed' (size_count = records_meta.length) so the
# diagnostics engine doesn't demand a closing terminator.
NO_LIST_TERMINATOR_KEYWORDS = frozenset({"VFPPROD", "VFPINJ"})


def _has_variable_arity_item(opm_items: list[dict]) -> bool:
Expand Down Expand Up @@ -1041,15 +1068,30 @@ def _summary_size_shape(mnemonic: str) -> tuple[str, Optional[int]]:
Field-scope mnemonics (F-prefix: FOPR, FWPR, …) are written bare with
no terminating '/' — ``size_kind: 'none'``.

Every other scope (G/W/C/L/R/B/A/N/S…) takes a *single* record that
is either a list of names (``WOPR \\n 'W1' 'W2' /``) or just a bare
'/' meaning "all" (``WOPR \\n /``). That's ``size_kind: 'fixed'``
with ``size_count: 1`` — modelling it as 'list' wrongly demands a
second standalone '/' to close the block.
Every other scope (G/W/C/L/R/B/A/N/S…) takes an optional block whose
payload is a list of names — well, group, region, completion, …
— spread freely across one or more lines and closed by a single '/':

WOPR
'PROD1'
'PROD2'
/

The body is OPTIONAL: ``WOPR \\n GMWIN \\n /`` (two bare mnemonics
stacked, single closing '/') is a real and widespread pattern in OPM
decks. Callers pair ``size_kind: 'array'`` with ``optional_body =
True`` for these entries so the diagnostics engine skips the
close-block terminator check when no values were given but still
flags a forgotten '/' once names are listed.
"""
if mnemonic.startswith("F"):
return "none", None
return "fixed", 1
return "array", None


def _summary_optional_body(mnemonic: str) -> bool:
"""Whether the SUMMARY mnemonic's record body may be omitted entirely."""
return not mnemonic.startswith("F")


def _parse_performance_table(rows, section_fodt: Path) -> dict:
Expand Down Expand Up @@ -1161,6 +1203,8 @@ def _row_starting_with(label: str):
}
if size_count is not None:
entry["size_count"] = size_count
if _summary_optional_body(mnemonic):
entry["optional_body"] = True
out[mnemonic] = entry
return out

Expand Down Expand Up @@ -1264,6 +1308,8 @@ def _cell(i):
}
if size_count is not None:
entry["size_count"] = size_count
if _summary_optional_body(mnemonic):
entry["optional_body"] = True
if is_templated:
entry["templated"] = True
out[mnemonic] = entry
Expand Down Expand Up @@ -1342,6 +1388,65 @@ def build_index(manual_dir: Path) -> dict:
for e in targets:
e["templated"] = True

# Mark keywords whose single record canonically spans multiple lines
# so per-line missing-'/' diagnostics are suppressed (MESSAGES, …).
for name in VARIADIC_RECORD_KEYWORDS:
entry = index.get(name)
if entry is None:
continue
targets = entry if isinstance(entry, list) else [entry]
for e in targets:
e["variadic_record"] = True

# Reclassify list-kind keywords that don't end with a standalone '/'
# as 'fixed' (size_count = records_meta length) so closeKw doesn't
# demand a final terminator. VFPPROD/VFPINJ: the trailing variadic
# record's '/' is the natural end of the block.
for name in NO_LIST_TERMINATOR_KEYWORDS:
entry = index.get(name)
if entry is None:
continue
targets = entry if isinstance(entry, list) else [entry]
for e in targets:
if e.get("size_kind") != "list":
continue
records_meta = e.get("records_meta")
if records_meta:
e["size_kind"] = "fixed"
e["size_count"] = len(records_meta)

# UDQ SUMMARY mnemonics are documented under placeholder names of the
# form ``<scope-prefix>UX{2,}`` (FUXXXXXX, WUXXXXXX, …) — the trailing
# X's stand for the user-defined UDQ name (up to six characters). Real
# decks write e.g. ``WUWI1`` or ``FUOIL``. Strip the trailing X's so
# the entry is keyed/named by the prefix alone (``WU``, ``FU``, …) and
# mark it templated so the standard ``<base>+[A-Z0-9]+`` fallback
# resolves deck tokens like WUWI1 to the WU template entry.
# Lazy quantifier on the prefix so ``FUXXXXXX`` splits as ("FU", "XXXXXX"),
# not ("FUXXXX", "XX"). ``[A-Z]+`` would otherwise consume the X's first.
udq_placeholder_re = re.compile(r"^([A-Z]+?)X{2,}$")
udq_renames: list[tuple[str, str]] = []
for name in list(index.keys()):
m = udq_placeholder_re.match(name)
if not m:
continue
prefix = m.group(1)
if prefix in index:
# Prefix entry already exists from some other path; leave the
# placeholder alone rather than risk overwriting real data.
continue
udq_renames.append((name, prefix))
for old_name, new_name in udq_renames:
entry = index.pop(old_name)
if isinstance(entry, list):
for e in entry:
e["name"] = new_name
e["templated"] = True
else:
entry["name"] = new_name
entry["templated"] = True
index[new_name] = entry

print(f"\nIndexed {total} keywords ({skipped} skipped)")
return index

Expand Down
47 changes: 29 additions & 18 deletions scripts/test_build_keyword_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
_opm_item_for_param,
_classify_size,
_summary_size_shape,
_summary_optional_body,
NS,
SECTION_MAP,
)
Expand Down Expand Up @@ -1146,19 +1147,22 @@ def test_field_scope_gets_size_kind_none(self, tmp_path):
assert entry["size_kind"] == "none"
assert "size_count" not in entry

def test_well_and_group_scope_get_size_kind_fixed_one(self, tmp_path):
# Group/well/region/etc. mnemonics take a single record (a list of
# names or just '/'); modelling them as 'list' would wrongly demand
# a closing standalone '/'. They're 'fixed' with size_count=1.
def test_well_and_group_scope_get_size_kind_array(self, tmp_path):
# Group/well/region/etc. mnemonics take an optional list of names
# spread across one or more lines and closed by a single '/'. That's
# ``size_kind: 'array'`` plus ``optional_body: True`` so a bare
# ``WOPR`` stacked back-to-back with another mnemonic is accepted
# but a forgotten closing '/' after listed names is still flagged.
body = self._fgwcl_table(
_row("Flow", "Gas-Oil Ratio", "GOR",
"", "GGOR", "WGOR", "", "", ""),
)
fodt = self._write_section_fodt(tmp_path, body)
out = parse_summary_mnemonics(fodt)
for kw in ("GGOR", "WGOR"):
assert out[kw]["size_kind"] == "fixed"
assert out[kw]["size_count"] == 1
assert out[kw]["size_kind"] == "array"
assert "size_count" not in out[kw]
assert out[kw]["optional_body"] is True

def test_skips_empty_scope_cells(self, tmp_path):
# Only WOPT exists for this row; the empty Field/Group cells must
Expand Down Expand Up @@ -1218,8 +1222,9 @@ def test_picks_up_network_model_gpr(self, tmp_path):
fodt = self._write_section_fodt(tmp_path, body)
out = parse_summary_mnemonics(fodt)
assert "GPR" in out
assert out["GPR"]["size_kind"] == "fixed"
assert out["GPR"]["size_count"] == 1
assert out["GPR"]["size_kind"] == "array"
assert "size_count" not in out["GPR"]
assert out["GPR"]["optional_body"] is True

def test_tags_tracer_rows_as_templated(self, tmp_path):
# Tracer mnemonics (FTPR, WTPC, …) are templates — the user appends
Expand Down Expand Up @@ -1256,10 +1261,11 @@ def test_picks_up_field_group_control_mode_table(self, tmp_path):
out = parse_summary_mnemonics(fodt)
for kw in ("FMCTP", "GMCTP", "FMCTW", "GMCTW", "FMCTG", "GMCTG"):
assert kw in out
# Field-scope stays bare; group-scope takes a single record.
# Field-scope stays bare; group-scope takes an optional list of names.
assert out["FMCTP"]["size_kind"] == "none"
assert out["GMCTP"]["size_kind"] == "fixed"
assert out["GMCTP"]["size_count"] == 1
assert out["GMCTP"]["size_kind"] == "array"
assert "size_count" not in out["GMCTP"]
assert out["GMCTP"]["optional_body"] is True
# Description spans pair Field/Group correctly.
assert "Production Group" in out["FMCTP"]["summary"]
assert "Production Group" in out["GMCTP"]["summary"]
Expand All @@ -1278,11 +1284,13 @@ def test_picks_up_well_control_mode_table(self, tmp_path):
body = _table(title, groups, mnem, desc)
fodt = self._write_section_fodt(tmp_path, body)
out = parse_summary_mnemonics(fodt)
assert out["WSTAT"]["size_kind"] == "fixed"
assert out["WSTAT"]["size_count"] == 1
assert out["WSTAT"]["size_kind"] == "array"
assert "size_count" not in out["WSTAT"]
assert out["WSTAT"]["optional_body"] is True
assert "Well Status" in out["WSTAT"]["summary"]
assert "Well Mode of Control" in out["WMCTL"]["summary"]
assert out["WMCTL"]["size_kind"] == "fixed"
assert out["WMCTL"]["size_kind"] == "array"
assert out["WMCTL"]["optional_body"] is True

def test_picks_up_performance_table(self, tmp_path):
# The "OPM Flow Simulation Performance" table has a different
Expand Down Expand Up @@ -1328,19 +1336,22 @@ def test_handles_aquifer_and_recovery_table_titles(self, tmp_path):
for kw in ("FAQR", "AAQR", "ALQR", "ANQR", "FOE", "ROE"):
assert kw in out
assert out["FAQR"]["size_kind"] == "none"
assert out["AAQR"]["size_kind"] == "fixed" and out["AAQR"]["size_count"] == 1
assert out["AAQR"]["size_kind"] == "array" and out["AAQR"]["optional_body"] is True
assert out["FOE"]["size_kind"] == "none"
assert out["ROE"]["size_kind"] == "fixed" and out["ROE"]["size_count"] == 1
assert out["ROE"]["size_kind"] == "array" and out["ROE"]["optional_body"] is True


class TestSummarySizeShape:
def test_field_scope_none(self):
assert _summary_size_shape("FOPR") == ("none", None)
assert _summary_size_shape("FWPR") == ("none", None)

def test_other_scopes_fixed_one(self):
def test_other_scopes_array_optional(self):
# W/G/R/B/A-prefixed mnemonics take an optional list of names
# spread across one or more lines and closed by a single '/'.
for kw in ("WOPR", "WWIR", "GGOR", "ROE", "BPR", "AAQR"):
assert _summary_size_shape(kw) == ("fixed", 1)
assert _summary_size_shape(kw) == ("array", None)
assert _summary_optional_body(kw) is True


class TestAttachStringOptions:
Expand Down
2 changes: 1 addition & 1 deletion vscode-extension/data/keyword_index_compact.json

Large diffs are not rendered by default.

19 changes: 18 additions & 1 deletion vscode-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,41 +54,58 @@
"extensions": [
".data",
".DATA",
".dat",
".inc",
".INC",
".incl",
".include",
".sch",
".SCH",
".sched",
".schedule",
".grdecl",
".GRDECL",
".grid",
".gridopts",
".vfp",
".VFP",
".vfpprod",
".prop",
".prpecl",
".Ecl",
".ecl",
".summary",
".smry",
".aqucon",
".aqunum",
".dimens",
".eqldims",
".eqlnum",
".equil",
".fault",
".faults",
".fipnum",
".fipzon",
".multnum",
".multregp",
".multregt",
".nnc",
".ntg",
".opernum",
".perm",
".permx",
".poro",
".pvt",
".pvtnum",
".regdims",
".rocknum",
".rxvd",
".satnum",
".sattab",
".swatinit",
".tabdims",
".thpres"
".thpres",
".trans"
],
"configuration": "./language-configuration.json"
}
Expand Down
Loading