@@ -104,8 +104,6 @@ module.exports = grammar({
104104 [ $ . rank_statement ] ,
105105 [ $ . stop_statement , $ . identifier ] ,
106106 [ $ . type_statement ] ,
107- [ $ . preproc_else_in_specification_part , $ . program ] ,
108- [ $ . preproc_if_in_specification_part , $ . program ] ,
109107 [ $ . _preproc_expression , $ . _expression ] ,
110108 [ $ . coarray_critical_statement , $ . identifier ] ,
111109 [ $ . format_statement , $ . identifier ] ,
@@ -124,26 +122,74 @@ module.exports = grammar({
124122 ] ,
125123
126124 rules : {
127- translation_unit : $ => seq (
128- repeat ( $ . _top_level_item ) ,
129- optional ( $ . program ) ,
125+ translation_unit : $ => choice (
126+ // A complete (normal) Fortran source file
127+ seq (
128+ repeat ( $ . _top_level_item_normal ) ,
129+ optional ( $ . program ) ,
130+ ) ,
131+ // A relaxed top-level parsing, allowing specification parts and
132+ // executable statements at top-level. This is particularly useful for
133+ // parsing include files, which typically contain fragments of code
134+ $ . _relaxed_translation_unit ,
135+ ) ,
136+
137+ _relaxed_translation_unit : $ => seq (
138+ // 0 or more
139+ repeat ( $ . _top_level_item_normal ) ,
140+ // At least one item not typically allowed at top-level
141+ $ . _top_level_item_floating ,
142+ // Ideally, we would now allow any number of `top_level_item` nodes,
143+ // which include both `_top_level_item_normal` and
144+ // `_top_level_item_floating`. However, we need a workaround for
145+ // implicit programs (those without an explicit starting statement, and
146+ // that end abruptly); e.g.:
147+ //
148+ // return 1
149+ // end
150+ //
151+ // Because implicit programs lack an starting `program` statement,
152+ // tree-sitter initially parses the code as "floating". When it later
153+ // encounters the `end` that implies a program scope, tree-sitter
154+ // creates and immediately closes an empty program node. This isn't an
155+ // accurate representation of the source code, as all statements that
156+ // looked "floating" actually belong inside the program.
157+ //
158+ // I tried to adjust parsing priorities to steer tree-sitter down the
159+ // correct path in these scenarios, but every attempt ended up breaking
160+ // the handling of floating code in some way.
161+ //
162+ // Ultimately, the simplest workable compromise I found is to replace
163+ // `top_level_item` with `top_level_item_no_program` and allow an
164+ // optional `end_program_statement`. This approach preserves correct
165+ // parsing of floating code, while also allowing the parse tree to
166+ // represent implicit programs via their terminator, instead of
167+ // creating empty program nodes.
168+ repeat ( $ . _top_level_item_no_program ) ,
169+ optional ( $ . end_program_statement ) ,
130170 ) ,
131171
132172 _top_level_item : $ => prec ( 2 , choice (
133- $ . include_statement ,
134- $ . program ,
135- $ . module ,
136- $ . submodule ,
137- $ . interface ,
138- $ . subroutine ,
139- $ . function ,
140- $ . block_data ,
141- $ . preproc_if ,
142- $ . preproc_ifdef ,
143- $ . preproc_include ,
144- $ . preproc_def ,
145- $ . preproc_function_def ,
146- $ . preproc_call ,
173+ $ . _top_level_item_normal ,
174+ $ . _top_level_item_floating ,
175+ ) ) ,
176+
177+ _top_level_item_no_program : $ => prec ( 2 , choice (
178+ $ . _top_level_item_normal_no_program ,
179+ $ . _top_level_item_floating ,
180+ ) ) ,
181+
182+ _top_level_item_normal : $ => prec ( 4 ,
183+ normalTopLevelItemInFortranFile ( $ , /*allowProgram=*/ true )
184+ ) ,
185+
186+ _top_level_item_normal_no_program : $ => prec ( 4 ,
187+ normalTopLevelItemInFortranFile ( $ , /*allowProgram=*/ false )
188+ ) ,
189+
190+ _top_level_item_floating : $ => prec ( 3 , choice (
191+ $ . _specification_part ,
192+ $ . _statement ,
147193 ) ) ,
148194
149195 // Preprocessor
@@ -2562,6 +2608,36 @@ function preprocessor(command) {
25622608 return alias ( new RegExp ( '#[ \t]*' + command ) , '#' + command ) ;
25632609}
25642610
2611+ /**
2612+ * List of top-level items accepted in a normal Fortran translation unit
2613+ *
2614+ * @param {GrammarSymbols<string> } $
2615+ * @param {boolean } allowProgram
2616+ *
2617+ * @returns {ChoiceRule }
2618+ */
2619+ function normalTopLevelItemInFortranFile ( $ , allowProgram ) {
2620+ const items = [
2621+ $ . include_statement ,
2622+ $ . module ,
2623+ $ . submodule ,
2624+ $ . interface ,
2625+ $ . subroutine ,
2626+ $ . function ,
2627+ $ . block_data ,
2628+ $ . preproc_if ,
2629+ $ . preproc_ifdef ,
2630+ $ . preproc_include ,
2631+ $ . preproc_def ,
2632+ $ . preproc_function_def ,
2633+ $ . preproc_call ,
2634+ ] ;
2635+ if ( allowProgram ) {
2636+ items . splice ( 1 , 0 , $ . program ) ;
2637+ }
2638+ return choice ( ...items ) ;
2639+ }
2640+
25652641/**
25662642 * Common rule for procedures (function, subroutine, module procedure)
25672643 *
0 commit comments