Skip to content

Commit 9464798

Browse files
committed
Add support for parsing "floating" code
Incomplete blocks of code, such as partial derived type declarations or do loops, will still trigger parsing errors.
1 parent a695bee commit 9464798

6 files changed

Lines changed: 859958 additions & 788579 deletions

File tree

grammar.js

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ module.exports = grammar({
104104
[$.rank_statement],
105105
[$.stop_statement, $.identifier],
106106
[$.type_statement],
107-
[$.preproc_else_in_specification_part, $.program],
108-
[$.preproc_if_in_specification_part, $.program],
109107
[$._preproc_expression, $._expression],
110108
[$.coarray_critical_statement, $.identifier],
111109
[$.format_statement, $.identifier],
@@ -124,26 +122,69 @@ module.exports = grammar({
124122
],
125123

126124
rules: {
127-
translation_unit: $ => seq(
128-
repeat($._top_level_item),
129-
optional($.program),
125+
translation_unit: $ => choice(
126+
// A complete (normal) Fortran source file
127+
seq(
128+
repeat($._top_level_item_normal),
129+
optional($.program),
130+
),
131+
// A relaxed top-level parsing, allowing specification parts and
132+
// executable statements at top-level. This is particularly useful for
133+
// parsing include files, which typically contain fragments of code
134+
seq(
135+
// 0 or more
136+
repeat($._top_level_item_normal),
137+
// At least one item not typically allowed at top-level
138+
$._top_level_item_floating,
139+
// Ideally, we would allow any number of `top_level_item` nodes, which
140+
// include both `_top_level_item_normal` and
141+
// `_top_level_item_floating`. However, we need a workaround for
142+
// implicit programs (those without an explicit starting statement that
143+
// end abruptly); e.g.:
144+
//
145+
// return 1
146+
// end
147+
//
148+
// Because implicit programs lack an starting `program` statement,
149+
// tree-sitter initially parses the code as "floating". When it later
150+
// encounters the `end` that implies a program scope, tree-sitter
151+
// creates and inmediately closes an empty program node. This wouldn't
152+
// be an accurate representation of the source code.
153+
//
154+
// I tried adjusting parsing priorities to force tree-sitter down the
155+
// correct path in these scenarios, but every attempt ended up breaking
156+
// the handling of floating code in some way.
157+
//
158+
// The simplest workable compromise is to allow an optional
159+
// `end_program_statement`", which lets the parse tree represent the
160+
// program implicitly via its terminator while also preventing any
161+
// parse errors. This behavior is a relevant difference from upstream.
162+
repeat($._top_level_item_no_program),
163+
optional($.end_program_statement),
164+
),
130165
),
131166

132167
_top_level_item: $ => prec(2, choice(
133-
$.include_statement,
134-
$.program,
135-
$.module,
136-
$.submodule,
137-
$.interface,
138-
$.subroutine,
139-
$.function,
140-
$.block_data,
141-
$.preproc_if,
142-
$.preproc_ifdef,
143-
$.preproc_include,
144-
$.preproc_def,
145-
$.preproc_function_def,
146-
$.preproc_call,
168+
$._top_level_item_normal,
169+
$._top_level_item_floating,
170+
)),
171+
172+
_top_level_item_no_program: $ => prec(2, choice(
173+
$._top_level_item_normal_no_program,
174+
$._top_level_item_floating,
175+
)),
176+
177+
_top_level_item_normal: $ => prec(4,
178+
normalTopLevelItemInFortranFile($, /*allowProgram=*/true)
179+
),
180+
181+
_top_level_item_normal_no_program: $ => prec(4,
182+
normalTopLevelItemInFortranFile($, /*allowProgram=*/false)
183+
),
184+
185+
_top_level_item_floating: $ => prec(3, choice(
186+
$._specification_part,
187+
$._statement,
147188
)),
148189

149190
// Preprocessor
@@ -2562,6 +2603,36 @@ function preprocessor(command) {
25622603
return alias(new RegExp('#[ \t]*' + command), '#' + command);
25632604
}
25642605

2606+
/**
2607+
* List of top-level items accepted in a normal Fortran translation unit
2608+
*
2609+
* @param {GrammarSymbols<string>} $
2610+
* @param {boolean} allowProgram
2611+
*
2612+
* @returns {ChoiceRule}
2613+
*/
2614+
function normalTopLevelItemInFortranFile($, allowProgram) {
2615+
const items = [
2616+
$.include_statement,
2617+
$.module,
2618+
$.submodule,
2619+
$.interface,
2620+
$.subroutine,
2621+
$.function,
2622+
$.block_data,
2623+
$.preproc_if,
2624+
$.preproc_ifdef,
2625+
$.preproc_include,
2626+
$.preproc_def,
2627+
$.preproc_function_def,
2628+
$.preproc_call,
2629+
];
2630+
if (allowProgram) {
2631+
items.splice(1, 0, $.program);
2632+
}
2633+
return choice(...items);
2634+
}
2635+
25652636
/**
25662637
* Common rule for procedures (function, subroutine, module procedure)
25672638
*

0 commit comments

Comments
 (0)