Skip to content

Commit 6f3d78c

Browse files
committed
Add support for parsing "floating" code
Incomplete blocks of code, such as partial derived type declarations or do loops, will still trigger parsing errors.
1 parent a695bee commit 6f3d78c

6 files changed

Lines changed: 859961 additions & 788579 deletions

File tree

grammar.js

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ module.exports = grammar({
104104
[$.rank_statement],
105105
[$.stop_statement, $.identifier],
106106
[$.type_statement],
107-
[$.preproc_else_in_specification_part, $.program],
108-
[$.preproc_if_in_specification_part, $.program],
109107
[$._preproc_expression, $._expression],
110108
[$.coarray_critical_statement, $.identifier],
111109
[$.format_statement, $.identifier],
@@ -124,26 +122,72 @@ module.exports = grammar({
124122
],
125123

126124
rules: {
127-
translation_unit: $ => seq(
128-
repeat($._top_level_item),
129-
optional($.program),
125+
translation_unit: $ => choice(
126+
// A complete (normal) Fortran source file
127+
seq(
128+
repeat($._top_level_item_normal),
129+
optional($.program),
130+
),
131+
// A relaxed top-level parsing, allowing specification parts and
132+
// executable statements at top-level. This is particularly useful for
133+
// parsing include files, which typically contain fragments of code
134+
seq(
135+
// 0 or more
136+
repeat($._top_level_item_normal),
137+
// At least one item not typically allowed at top-level
138+
$._top_level_item_floating,
139+
// Ideally, we would now allow any number of `top_level_item` nodes,
140+
// which include both `_top_level_item_normal` and
141+
// `_top_level_item_floating`. However, we need a workaround for
142+
// implicit programs (those without an explicit starting statement, and
143+
// that end abruptly); e.g.:
144+
//
145+
// return 1
146+
// end
147+
//
148+
// Because implicit programs lack an starting `program` statement,
149+
// tree-sitter initially parses the code as "floating". When it later
150+
// encounters the `end` that implies a program scope, tree-sitter
151+
// creates and immediately closes an empty program node. This isn't an
152+
// accurate representation of the source code, as all statements that
153+
// looked "floating" actually belong inside the program.
154+
//
155+
// I tried to adjust parsing priorities to steer tree-sitter down the
156+
// correct path in these scenarios, but every attempt ended up breaking
157+
// the handling of floating code in some way.
158+
//
159+
// Ultimately, the simplest workable compromise I found is to replace
160+
// `top_level_item` with `top_level_item_no_program` and allow an
161+
// optional `end_program_statement`. This approach preserves correct
162+
// parsing of floating code, while also allowing the parse tree to
163+
// represent implicit programs via their terminator, instead of
164+
// creating empty program nodes.
165+
repeat($._top_level_item_no_program),
166+
optional($.end_program_statement),
167+
),
130168
),
131169

132170
_top_level_item: $ => prec(2, choice(
133-
$.include_statement,
134-
$.program,
135-
$.module,
136-
$.submodule,
137-
$.interface,
138-
$.subroutine,
139-
$.function,
140-
$.block_data,
141-
$.preproc_if,
142-
$.preproc_ifdef,
143-
$.preproc_include,
144-
$.preproc_def,
145-
$.preproc_function_def,
146-
$.preproc_call,
171+
$._top_level_item_normal,
172+
$._top_level_item_floating,
173+
)),
174+
175+
_top_level_item_no_program: $ => prec(2, choice(
176+
$._top_level_item_normal_no_program,
177+
$._top_level_item_floating,
178+
)),
179+
180+
_top_level_item_normal: $ => prec(4,
181+
normalTopLevelItemInFortranFile($, /*allowProgram=*/true)
182+
),
183+
184+
_top_level_item_normal_no_program: $ => prec(4,
185+
normalTopLevelItemInFortranFile($, /*allowProgram=*/false)
186+
),
187+
188+
_top_level_item_floating: $ => prec(3, choice(
189+
$._specification_part,
190+
$._statement,
147191
)),
148192

149193
// Preprocessor
@@ -2562,6 +2606,36 @@ function preprocessor(command) {
25622606
return alias(new RegExp('#[ \t]*' + command), '#' + command);
25632607
}
25642608

2609+
/**
2610+
* List of top-level items accepted in a normal Fortran translation unit
2611+
*
2612+
* @param {GrammarSymbols<string>} $
2613+
* @param {boolean} allowProgram
2614+
*
2615+
* @returns {ChoiceRule}
2616+
*/
2617+
function normalTopLevelItemInFortranFile($, allowProgram) {
2618+
const items = [
2619+
$.include_statement,
2620+
$.module,
2621+
$.submodule,
2622+
$.interface,
2623+
$.subroutine,
2624+
$.function,
2625+
$.block_data,
2626+
$.preproc_if,
2627+
$.preproc_ifdef,
2628+
$.preproc_include,
2629+
$.preproc_def,
2630+
$.preproc_function_def,
2631+
$.preproc_call,
2632+
];
2633+
if (allowProgram) {
2634+
items.splice(1, 0, $.program);
2635+
}
2636+
return choice(...items);
2637+
}
2638+
25652639
/**
25662640
* Common rule for procedures (function, subroutine, module procedure)
25672641
*

0 commit comments

Comments
 (0)