Skip to content

Commit 348768f

Browse files
committed
Add support for parsing "floating" code
The goal is to allow specification parts and executable statements at top-level, in addition to the usually allowed Fortran items. This is particularly useful for parsing include files, which typically contain fragments of code rather than complete Fortran translation units. Incomplete blocks of code, such as partial derived type declarations or do loops, will still trigger parsing errors.
1 parent 479af63 commit 348768f

6 files changed

Lines changed: 837250 additions & 765408 deletions

File tree

grammar.js

Lines changed: 95 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ module.exports = grammar({
104104
[$.rank_statement],
105105
[$.stop_statement, $.identifier],
106106
[$.type_statement],
107-
[$.preproc_else_in_specification_part, $.program],
108-
[$.preproc_if_in_specification_part, $.program],
109107
[$._preproc_expression, $._expression],
110108
[$.coarray_critical_statement, $.identifier],
111109
[$.format_statement, $.identifier],
@@ -124,26 +122,74 @@ module.exports = grammar({
124122
],
125123

126124
rules: {
127-
translation_unit: $ => seq(
128-
repeat($._top_level_item),
129-
optional($.program),
125+
translation_unit: $ => choice(
126+
// A complete (normal) Fortran source file
127+
seq(
128+
repeat($._top_level_item_normal),
129+
optional($.program),
130+
),
131+
// A relaxed top-level parsing, allowing specification parts and
132+
// executable statements at top-level. This is particularly useful for
133+
// parsing include files, which typically contain fragments of code
134+
$._relaxed_translation_unit,
135+
),
136+
137+
_relaxed_translation_unit: $ => seq(
138+
// 0 or more
139+
repeat($._top_level_item_normal),
140+
// At least one item not typically allowed at top-level
141+
$._top_level_item_floating,
142+
// Ideally, we would now allow any number of `top_level_item` nodes,
143+
// which include both `_top_level_item_normal` and
144+
// `_top_level_item_floating`. However, we need a workaround for
145+
// implicit programs (those without an explicit starting statement, and
146+
// that end abruptly); e.g.:
147+
//
148+
// return 1
149+
// end
150+
//
151+
// Because implicit programs lack an starting `program` statement,
152+
// tree-sitter initially parses the code as "floating". When it later
153+
// encounters the `end` that implies a program scope, tree-sitter
154+
// creates and immediately closes an empty program node. This isn't an
155+
// accurate representation of the source code, as all statements that
156+
// looked "floating" actually belong inside the program.
157+
//
158+
// I tried to adjust parsing priorities to steer tree-sitter down the
159+
// correct path in these scenarios, but every attempt ended up breaking
160+
// the handling of floating code in some way.
161+
//
162+
// Ultimately, the simplest workable compromise I found is to replace
163+
// `top_level_item` with `top_level_item_no_program` and allow an
164+
// optional `end_program_statement`. This approach preserves correct
165+
// parsing of floating code, while also allowing the parse tree to
166+
// represent implicit programs via their terminator, instead of
167+
// creating empty program nodes.
168+
repeat($._top_level_item_no_program),
169+
optional($.end_program_statement),
130170
),
131171

132172
_top_level_item: $ => prec(2, choice(
133-
$.include_statement,
134-
$.program,
135-
$.module,
136-
$.submodule,
137-
$.interface,
138-
$.subroutine,
139-
$.function,
140-
$.block_data,
141-
$.preproc_if,
142-
$.preproc_ifdef,
143-
$.preproc_include,
144-
$.preproc_def,
145-
$.preproc_function_def,
146-
$.preproc_call,
173+
$._top_level_item_normal,
174+
$._top_level_item_floating,
175+
)),
176+
177+
_top_level_item_no_program: $ => prec(2, choice(
178+
$._top_level_item_normal_no_program,
179+
$._top_level_item_floating,
180+
)),
181+
182+
_top_level_item_normal: $ => prec(4,
183+
normalTopLevelItemInFortranFile($, /*allowProgram=*/true)
184+
),
185+
186+
_top_level_item_normal_no_program: $ => prec(4,
187+
normalTopLevelItemInFortranFile($, /*allowProgram=*/false)
188+
),
189+
190+
_top_level_item_floating: $ => prec(3, choice(
191+
$._specification_part,
192+
$._statement,
147193
)),
148194

149195
// Preprocessor
@@ -2562,6 +2608,36 @@ function preprocessor(command) {
25622608
return alias(new RegExp('#[ \t]*' + command), '#' + command);
25632609
}
25642610

2611+
/**
2612+
* List of top-level items accepted in a normal Fortran translation unit
2613+
*
2614+
* @param {GrammarSymbols<string>} $
2615+
* @param {boolean} allowProgram
2616+
*
2617+
* @returns {ChoiceRule}
2618+
*/
2619+
function normalTopLevelItemInFortranFile($, allowProgram) {
2620+
const items = [
2621+
$.include_statement,
2622+
$.module,
2623+
$.submodule,
2624+
$.interface,
2625+
$.subroutine,
2626+
$.function,
2627+
$.block_data,
2628+
$.preproc_if,
2629+
$.preproc_ifdef,
2630+
$.preproc_include,
2631+
$.preproc_def,
2632+
$.preproc_function_def,
2633+
$.preproc_call,
2634+
];
2635+
if (allowProgram) {
2636+
items.splice(1, 0, $.program);
2637+
}
2638+
return choice(...items);
2639+
}
2640+
25652641
/**
25662642
* Common rule for procedures (function, subroutine, module procedure)
25672643
*

0 commit comments

Comments
 (0)