@@ -104,8 +104,6 @@ module.exports = grammar({
104104 [ $ . rank_statement ] ,
105105 [ $ . stop_statement , $ . identifier ] ,
106106 [ $ . type_statement ] ,
107- [ $ . preproc_else_in_specification_part , $ . program ] ,
108- [ $ . preproc_if_in_specification_part , $ . program ] ,
109107 [ $ . _preproc_expression , $ . _expression ] ,
110108 [ $ . coarray_critical_statement , $ . identifier ] ,
111109 [ $ . format_statement , $ . identifier ] ,
@@ -124,26 +122,69 @@ module.exports = grammar({
124122 ] ,
125123
126124 rules : {
127- translation_unit : $ => seq (
128- repeat ( $ . _top_level_item ) ,
129- optional ( $ . program ) ,
125+ translation_unit : $ => choice (
126+ // A complete (normal) Fortran source file
127+ seq (
128+ repeat ( $ . _top_level_item_normal ) ,
129+ optional ( $ . program ) ,
130+ ) ,
131+ // A relaxed top-level parsing, allowing specification parts and
132+ // executable statements at top-level. This is particularly useful for
133+ // parsing include files, which typically contain fragments of code
134+ seq (
135+ // 0 or more
136+ repeat ( $ . _top_level_item_normal ) ,
137+ // At least one item not typically allowed at top-level
138+ $ . _top_level_item_floating ,
139+ // Ideally, we would allow any number of `top_level_item` nodes, which
140+ // include both `_top_level_item_normal` and
141+ // `_top_level_item_floating`. However, we need a workaround for
142+ // implicit programs (those without an explicit starting statement that
143+ // end abruptly); e.g.:
144+ //
145+ // return 1
146+ // end
147+ //
148+ // Because implicit programs lack an starting `program` statement,
149+ // tree-sitter initially parses the code as "floating". When it later
150+ // encounters the `end` that implies a program scope, tree-sitter
151+ // creates and inmediately closes an empty program node. This wouldn't
152+ // be an accurate representation of the source code.
153+ //
154+ // I tried adjusting parsing priorities to force tree-sitter down the
155+ // correct path in these scenarios, but every attempt ended up breaking
156+ // the handling of floating code in some way.
157+ //
158+ // The simplest workable compromise is to allow an optional
159+ // `end_program_statement`", which lets the parse tree represent the
160+ // program implicitly via its terminator while also preventing any
161+ // parse errors. This behavior is a relevant difference from upstream.
162+ repeat ( $ . _top_level_item_no_program ) ,
163+ optional ( $ . end_program_statement ) ,
164+ ) ,
130165 ) ,
131166
132167 _top_level_item : $ => prec ( 2 , choice (
133- $ . include_statement ,
134- $ . program ,
135- $ . module ,
136- $ . submodule ,
137- $ . interface ,
138- $ . subroutine ,
139- $ . function ,
140- $ . block_data ,
141- $ . preproc_if ,
142- $ . preproc_ifdef ,
143- $ . preproc_include ,
144- $ . preproc_def ,
145- $ . preproc_function_def ,
146- $ . preproc_call ,
168+ $ . _top_level_item_normal ,
169+ $ . _top_level_item_floating ,
170+ ) ) ,
171+
172+ _top_level_item_no_program : $ => prec ( 2 , choice (
173+ $ . _top_level_item_normal_no_program ,
174+ $ . _top_level_item_floating ,
175+ ) ) ,
176+
177+ _top_level_item_normal : $ => prec ( 4 ,
178+ normalTopLevelItemInFortranFile ( $ , /*allowProgram=*/ true )
179+ ) ,
180+
181+ _top_level_item_normal_no_program : $ => prec ( 4 ,
182+ normalTopLevelItemInFortranFile ( $ , /*allowProgram=*/ false )
183+ ) ,
184+
185+ _top_level_item_floating : $ => prec ( 3 , choice (
186+ $ . _specification_part ,
187+ $ . _statement ,
147188 ) ) ,
148189
149190 // Preprocessor
@@ -2562,6 +2603,36 @@ function preprocessor(command) {
25622603 return alias ( new RegExp ( '#[ \t]*' + command ) , '#' + command ) ;
25632604}
25642605
2606+ /**
2607+ * List of top-level items accepted in a normal Fortran translation unit
2608+ *
2609+ * @param {GrammarSymbols<string> } $
2610+ * @param {boolean } allowProgram
2611+ *
2612+ * @returns {ChoiceRule }
2613+ */
2614+ function normalTopLevelItemInFortranFile ( $ , allowProgram ) {
2615+ const items = [
2616+ $ . include_statement ,
2617+ $ . module ,
2618+ $ . submodule ,
2619+ $ . interface ,
2620+ $ . subroutine ,
2621+ $ . function ,
2622+ $ . block_data ,
2623+ $ . preproc_if ,
2624+ $ . preproc_ifdef ,
2625+ $ . preproc_include ,
2626+ $ . preproc_def ,
2627+ $ . preproc_function_def ,
2628+ $ . preproc_call ,
2629+ ] ;
2630+ if ( allowProgram ) {
2631+ items . splice ( 1 , 0 , $ . program ) ;
2632+ }
2633+ return choice ( ...items ) ;
2634+ }
2635+
25652636/**
25662637 * Common rule for procedures (function, subroutine, module procedure)
25672638 *
0 commit comments