From b7af04d69fa69ccbcc40fcb9e4ede455da208070 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 30 Sep 2024 21:30:07 +0200 Subject: [PATCH 01/42] First commit to add Scala Backend --- source/BNFC.cabal | 3 ++ source/main/Main.hs | 2 + source/src/BNFC/Backend/Scala.hs | 17 +++++++ source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 45 +++++++++++++++++++ source/src/BNFC/Options.hs | 3 +- 5 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 source/src/BNFC/Backend/Scala.hs create mode 100644 source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 7300a8d29..9174f3829 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -272,6 +272,9 @@ library -- Pygments backend BNFC.Backend.Pygments + -- Scala backend + BNFC.Backend.Scala + -- Agda backend BNFC.Backend.Agda diff --git a/source/main/Main.hs b/source/main/Main.hs index 754bf2684..8e4c27653 100644 --- a/source/main/Main.hs +++ b/source/main/Main.hs @@ -26,6 +26,7 @@ import BNFC.Backend.Latex import BNFC.Backend.OCaml import BNFC.Backend.Pygments import BNFC.Backend.TreeSitter +import BNFC.Backend.Scala import BNFC.CF (CF) import BNFC.GetCF import BNFC.Options hiding (make, Backend) @@ -83,3 +84,4 @@ maketarget = \case TargetPygments -> makePygments TargetCheck -> error "impossible" TargetTreeSitter -> makeTreeSitter + TargetScala -> makeScala diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs new file mode 100644 index 000000000..52197e650 --- /dev/null +++ b/source/src/BNFC/Backend/Scala.hs @@ -0,0 +1,17 @@ +module BNFC.Backend.Scala where + +import Prelude hiding ((<>)) + +import BNFC.Backend.Base (mkfile, Backend) +import BNFC.CF +import BNFC.Options hiding (Backend) +import BNFC.PrettyPrint (vcat, Doc) + +-- | Entrypoint for the Scala backend. + +makeScala :: SharedOptions -> CF -> Backend +makeScala opts cf = do + mkfile "scala.lx" comment $ vcat [ "ScalaFirt try" ] + +comment :: String -> String +comment = ("// " ++) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs new file mode 100644 index 000000000..65b71c8d7 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -0,0 +1,45 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Abstract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin + + Description : This module generates the Scala Abstract Syntax + tree classes. It generates both a Header file + and an Implementation file + + Author : Juan Pablo Poittevin + Created : 30 September, 2024 +-} + +module BNFC.Backend.C.CFtoCAbs (cf2CAbs) where + +import Prelude hiding ((<>)) + +import Control.Monad.State (State, gets, modify, evalState) + +import Data.Char ( toLower ) +import Data.Either ( lefts ) +import Data.Function ( on ) +import Data.List ( groupBy, intercalate, intersperse, nub, sort ) +import Data.Maybe ( mapMaybe ) +import Data.Set ( Set ) + +import qualified Data.Set as Set + +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options ( RecordPositions(..) ) +import BNFC.Utils ( (+++), unless ) +import BNFC.Backend.Common.NamedVariables + + +-- | The result is two files (.H file, .C file) +cf2CAbs + :: RecordPositions + -> String -- ^ Ignored. + -> CF -- ^ Grammar. + -> (String, String) -- ^ @.H@ file, @.C@ file. +cf2CAbs rp _ cf = ["ScalaAbsFile"] diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index ac5fdbf65..d1b0cafd2 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -64,7 +64,7 @@ data Target = TargetC | TargetCpp | TargetCppNoStl | TargetHaskell | TargetHaskellGadt | TargetLatex | TargetJava | TargetOCaml | TargetPygments | TargetTreeSitter - | TargetCheck + | TargetCheck | TargetScala deriving (Eq, Bounded, Enum, Ord) -- | List of Haskell target. @@ -83,6 +83,7 @@ instance Show Target where show TargetPygments = "Pygments" show TargetTreeSitter = "Tree-sitter" show TargetCheck = "Check LBNF file" + show TargetScala = "Scala" -- | Which version of Alex is targeted? data AlexVersion = Alex3 From 15ebbed6535d04b9c6a0e77f9ce83e71f376a8bc Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 30 Sep 2024 21:51:02 +0200 Subject: [PATCH 02/42] Added Scala to command doc output --- source/src/BNFC/Options.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index d1b0cafd2..86b89b9d5 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -313,6 +313,8 @@ targetOptions = "Output OCaml code for use with ocamllex and menhir (short for --ocaml --menhir)" , Option "" ["pygments"] (NoArg (\o -> o {target = TargetPygments})) "Output a Python lexer for Pygments" + , Option "" ["scala"] (NoArg (\o -> o {target = TargetScala})) + "Output a Scala lexer" , Option "" ["tree-sitter"] (NoArg (\o -> o {target = TargetTreeSitter})) "Output grammar.js file for use with tree-sitter" , Option "" ["check"] (NoArg (\ o -> o{target = TargetCheck })) @@ -530,6 +532,7 @@ instance Maintained Target where TargetJava -> True TargetOCaml -> True TargetPygments -> True + TargetScala -> True TargetTreeSitter -> True TargetCheck -> True From ba6278a8ee3d51d757f31896c3efea84d5a7c611 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Wed, 2 Oct 2024 20:41:09 +0200 Subject: [PATCH 03/42] using CFtoScalaAbs.hs --- source/src/BNFC/Backend/Scala.hs | 6 ++++-- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 12 +++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs index 52197e650..4ad4dfcfc 100644 --- a/source/src/BNFC/Backend/Scala.hs +++ b/source/src/BNFC/Backend/Scala.hs @@ -6,12 +6,14 @@ import BNFC.Backend.Base (mkfile, Backend) import BNFC.CF import BNFC.Options hiding (Backend) import BNFC.PrettyPrint (vcat, Doc) +import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) -- | Entrypoint for the Scala backend. makeScala :: SharedOptions -> CF -> Backend -makeScala opts cf = do - mkfile "scala.lx" comment $ vcat [ "ScalaFirt try" ] +makeScala opts cf = do + mkfile (name ++ ".lx") comment (cf2ScalaAbs cf) + where name = lang opts comment :: String -> String comment = ("// " ++) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index 65b71c8d7..6cf2b8e15 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -14,7 +14,7 @@ Created : 30 September, 2024 -} -module BNFC.Backend.C.CFtoCAbs (cf2CAbs) where +module BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) where import Prelude hiding ((<>)) @@ -37,9 +37,7 @@ import BNFC.Backend.Common.NamedVariables -- | The result is two files (.H file, .C file) -cf2CAbs - :: RecordPositions - -> String -- ^ Ignored. - -> CF -- ^ Grammar. - -> (String, String) -- ^ @.H@ file, @.C@ file. -cf2CAbs rp _ cf = ["ScalaAbsFile"] +cf2ScalaAbs + :: CF -- ^ Grammar. + -> Doc -- ^ @.H@ file, @.C@ file. +cf2ScalaAbs cf = vcat [ "ScalaAbsFile"] From 3fdf4d3606fc564fbb02e58e6a2e2f8a78a50b0f Mon Sep 17 00:00:00 2001 From: WilliamOlsen13 Date: Thu, 3 Oct 2024 02:36:33 -0300 Subject: [PATCH 04/42] Update BNFC.cabal --- source/BNFC.cabal | 1 + 1 file changed, 1 insertion(+) diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 9174f3829..736d00e8a 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -274,6 +274,7 @@ library -- Scala backend BNFC.Backend.Scala + BNFC.Backend.Scala.CFtoScalaAbs -- Agda backend BNFC.Backend.Agda From 764b662dc29b5922d3d3261d6d8f8a96aeccbb02 Mon Sep 17 00:00:00 2001 From: WilliamOlsen13 Date: Thu, 3 Oct 2024 03:05:30 -0300 Subject: [PATCH 05/42] Update Options.hs --- source/src/BNFC/Options.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index 86b89b9d5..573e6049e 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -532,7 +532,7 @@ instance Maintained Target where TargetJava -> True TargetOCaml -> True TargetPygments -> True - TargetScala -> True + TargetScala -> True TargetTreeSitter -> True TargetCheck -> True @@ -649,6 +649,7 @@ translateOldOptions = mapM $ \ o -> do , ("-csharp" , "--csharp") , ("-ocaml" , "--ocaml") , ("-haskell" , "--haskell") + , ("-scala" , "--scala") , ("-prof" , "--profile") , ("-gadt" , "--haskell-gadt") , ("-alex1" , "--alex1") From a21d53fd4e1db2f4c7d1ce56e2da8b5550e28e11 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 7 Oct 2024 21:02:40 +0200 Subject: [PATCH 06/42] Advance processing data object --- .gitignore | 2 + source/src/BNFC/Backend/Scala.hs | 2 +- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 51 ++++++++++++------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 9839dd9ff..5e4f1208c 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,5 @@ cabal.sandbox.config /testing/regression-tests/266_define/nostl-2.9.0/ /testing/regression-tests/comments/ocaml/ + +ignore-dir/ diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs index 4ad4dfcfc..1a48db3da 100644 --- a/source/src/BNFC/Backend/Scala.hs +++ b/source/src/BNFC/Backend/Scala.hs @@ -12,7 +12,7 @@ import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) makeScala :: SharedOptions -> CF -> Backend makeScala opts cf = do - mkfile (name ++ ".lx") comment (cf2ScalaAbs cf) + mkfile (name ++ ".lx") comment (cf2ScalaAbs opts cf) where name = lang opts comment :: String -> String diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index 6cf2b8e15..be3b3f707 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -18,26 +18,43 @@ module BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) where import Prelude hiding ((<>)) -import Control.Monad.State (State, gets, modify, evalState) - -import Data.Char ( toLower ) -import Data.Either ( lefts ) -import Data.Function ( on ) -import Data.List ( groupBy, intercalate, intersperse, nub, sort ) -import Data.Maybe ( mapMaybe ) -import Data.Set ( Set ) - -import qualified Data.Set as Set - import BNFC.CF import BNFC.PrettyPrint -import BNFC.Options ( RecordPositions(..) ) -import BNFC.Utils ( (+++), unless ) -import BNFC.Backend.Common.NamedVariables - +import BNFC.Backend.Haskell.Utils +import Data.Either +import BNFC.Options -- | The result is two files (.H file, .C file) cf2ScalaAbs - :: CF -- ^ Grammar. + :: SharedOptions + -> CF -- ^ Grammar. -> Doc -- ^ @.H@ file, @.C@ file. -cf2ScalaAbs cf = vcat [ "ScalaAbsFile"] +cf2ScalaAbs Options{ lang, tokenText, generic, functor } cf = vsep . concat $ + [ + [] + , map text $ map dataToString datas + , map (prData functor) datas + , ["// amount of datas: ", text $ show $ length datas] + , ["// ScalaAbsFile", ""] + ] + where + datas = cf2data cf + + +dataToString :: (Cat, [(String, [Cat])]) -> String +dataToString (cat, _) = catToStr cat + + +prData :: Bool -> Data -> Doc +prData functor (cat,rules) = vcat $ concat + [ + [ hang ("Sclass" <+> dataType) 4 $ constructors rules ] + ] + where + prRule (fun, cats) = hsep $ concat [ [text fun], map prArg cats ] + dataType = pretty cat + prArg c = catToType id empty c + constructors [] = empty + -- I need to fix the next line, is adding the : at the haskell style and not Scala + constructors (h:t) = sep $ [prRule h <+> ":"] ++ map ((<+>":") . prRule) t -- TODO: this can be improved, there is no need for 2 maps + -- constructors (h:t) = sep $ [text h <+> ":"] \ No newline at end of file From 73101b09cf337e8269e89f618d90fb3d713f378f Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Thu, 10 Oct 2024 21:08:06 +0200 Subject: [PATCH 07/42] Update Abs to be more Scala ish --- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index be3b3f707..93ccc7249 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -23,6 +23,7 @@ import BNFC.PrettyPrint import BNFC.Backend.Haskell.Utils import Data.Either import BNFC.Options +import Data.List (intercalate) -- | The result is two files (.H file, .C file) cf2ScalaAbs @@ -44,17 +45,20 @@ cf2ScalaAbs Options{ lang, tokenText, generic, functor } cf = vsep . concat $ dataToString :: (Cat, [(String, [Cat])]) -> String dataToString (cat, _) = catToStr cat +mapBut1 :: (a -> a) -> [a] -> [a] +mapBut1 f (x:y:xs) = f x : mapBut1 f (y:xs) +mapBut1 _ other = other prData :: Bool -> Data -> Doc prData functor (cat,rules) = vcat $ concat [ - [ hang ("Sclass" <+> dataType) 4 $ constructors rules ] + [ hang ("Sclass" <+> dataType <+> ":") 4 $ constructors rules ] -- I have to find a way to add strings without space, <+> add stirngs but add also a space ] where - prRule (fun, cats) = hsep $ concat [ [text fun], map prArg cats ] + -- This "mapBut1" is used to add a comma between numerated parameters, but I think should be a better way + prRule (fun, cats) = hsep $ concat [ [text fun <+> "("], mapBut1 (<+> text ", ") (map prArg (indexedCats cats)) , [text ") {}"] ] dataType = pretty cat - prArg c = catToType id empty c + indexedCats c = zip [0..] c + prArg (n, c) = text $ catToStr c ++ " " ++ ("p" ++ show n) constructors [] = empty - -- I need to fix the next line, is adding the : at the haskell style and not Scala - constructors (h:t) = sep $ [prRule h <+> ":"] ++ map ((<+>":") . prRule) t -- TODO: this can be improved, there is no need for 2 maps - -- constructors (h:t) = sep $ [text h <+> ":"] \ No newline at end of file + constructors (h:t) = sep $ [prRule h] ++ map prRule t \ No newline at end of file From 5900317ffce607fae15408abb3551ed5ca35af96 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 14 Oct 2024 21:06:59 +0200 Subject: [PATCH 08/42] Change Abs to match Scala syntax example --- .gitignore | 2 + source/src/BNFC/Backend/Scala.hs | 2 +- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 47 ++++++++++--------- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 5e4f1208c..ce92f1390 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,5 @@ cabal.sandbox.config /testing/regression-tests/comments/ocaml/ ignore-dir/ + +.DS_Store diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs index 1a48db3da..74c543410 100644 --- a/source/src/BNFC/Backend/Scala.hs +++ b/source/src/BNFC/Backend/Scala.hs @@ -12,7 +12,7 @@ import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) makeScala :: SharedOptions -> CF -> Backend makeScala opts cf = do - mkfile (name ++ ".lx") comment (cf2ScalaAbs opts cf) + mkfile (name ++ ".scala") comment (cf2ScalaAbs opts cf) where name = lang opts comment :: String -> String diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index 93ccc7249..6764ae19c 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -20,45 +20,46 @@ import Prelude hiding ((<>)) import BNFC.CF import BNFC.PrettyPrint -import BNFC.Backend.Haskell.Utils -import Data.Either import BNFC.Options -import Data.List (intercalate) -- | The result is two files (.H file, .C file) cf2ScalaAbs :: SharedOptions -> CF -- ^ Grammar. -> Doc -- ^ @.H@ file, @.C@ file. -cf2ScalaAbs Options{ lang, tokenText, generic, functor } cf = vsep . concat $ +cf2ScalaAbs Options{ lang } cf = vsep . concat $ [ [] - , map text $ map dataToString datas - , map (prData functor) datas - , ["// amount of datas: ", text $ show $ length datas] - , ["// ScalaAbsFile", ""] + , ["package" <+> text lang] + , [hang classSign 4 $ functions] + , defaultFunction lang + , ["}"] ] where - datas = cf2data cf + datas = cf2data cf + functions = vcat (map prData datas) -dataToString :: (Cat, [(String, [Cat])]) -> String -dataToString (cat, _) = catToStr cat +-- dataToString :: (Cat, [(String, [Cat])]) -> String +-- dataToString (cat, _) = catToStr cat -mapBut1 :: (a -> a) -> [a] -> [a] -mapBut1 f (x:y:xs) = f x : mapBut1 f (y:xs) -mapBut1 _ other = other +classSign :: Doc +classSign = "abstract class AbstractVisitor[R, A] extends AllVisitor[R, A]{" -prData :: Bool -> Data -> Doc -prData functor (cat,rules) = vcat $ concat +prData :: Data -> Doc +prData (_, rules) = vcat $ concat [ - [ hang ("Sclass" <+> dataType <+> ":") 4 $ constructors rules ] -- I have to find a way to add strings without space, <+> add stirngs but add also a space + [constructors rules] ] where - -- This "mapBut1" is used to add a comma between numerated parameters, but I think should be a better way - prRule (fun, cats) = hsep $ concat [ [text fun <+> "("], mapBut1 (<+> text ", ") (map prArg (indexedCats cats)) , [text ") {}"] ] - dataType = pretty cat - indexedCats c = zip [0..] c - prArg (n, c) = text $ catToStr c ++ " " ++ ("p" ++ show n) + prRule (fun, _) = hsep $ concat [ ["def visit(p: " <+> text ("Absyn." ++ fun) <+> text ", arg: A): R = visitDefault(p, arg)"] ] constructors [] = empty - constructors (h:t) = sep $ [prRule h] ++ map prRule t \ No newline at end of file + constructors (h:t) = sep $ [prRule h] ++ map prRule t + +defaultFunction :: String -> [Doc] +defaultFunction lang = + [ + nest 4 $ text $ "def visitDefault(p: Absyn." ++ lang ++ ", arg: A): R = {" + , nest 8 "throw new IllegalArgumentException(this.getClass.getName + \": \" + p)" + , nest 4 "}" + ] \ No newline at end of file From ed639011748c1e09682df16ec893214a0348e2cc Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Wed, 30 Oct 2024 20:53:52 +0100 Subject: [PATCH 09/42] Add auxiliar functions to print Data in file This add a bunch of warnings and a commented Code, must be removed in the near feature, this functions are useful for the study of BNFC --- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index 6764ae19c..b7a38f5ae 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -21,6 +21,7 @@ import Prelude hiding ((<>)) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options +import Data.List -- | The result is two files (.H file, .C file) cf2ScalaAbs @@ -34,14 +35,22 @@ cf2ScalaAbs Options{ lang } cf = vsep . concat $ , [hang classSign 4 $ functions] , defaultFunction lang , ["}"] + , [text $ concat $ map dataToString datas] ] where datas = cf2data cf functions = vcat (map prData datas) --- dataToString :: (Cat, [(String, [Cat])]) -> String --- dataToString (cat, _) = catToStr cat +dataToString :: (Cat, [(String, [Cat])]) -> String +dataToString (cat, []) = catToStr cat +dataToString (cat, cats) = "Main Cat:" ++ catToStr cat ++ " List of [(String, [Cat])]: " ++ strCatToString cats + + +strCatToString :: [(String, [Cat])] -> String +strCatToString ([]) = "" +strCatToString (ncat:[]) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) +strCatToString (ncat:cats) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) ++ strCatToString cats classSign :: Doc classSign = "abstract class AbstractVisitor[R, A] extends AllVisitor[R, A]{" From 3cfa7a2f737f30b1c2976d05ab49e745d9fab5f8 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Wed, 30 Oct 2024 20:54:11 +0100 Subject: [PATCH 10/42] comment unnecesary code in scala --- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index b7a38f5ae..8aaf753c0 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -35,7 +35,7 @@ cf2ScalaAbs Options{ lang } cf = vsep . concat $ , [hang classSign 4 $ functions] , defaultFunction lang , ["}"] - , [text $ concat $ map dataToString datas] + -- , [text $ concat $ map dataToString datas] ] where datas = cf2data cf From d8be28c5b7e4834bb768ce6f21fe253a487d5794 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Tue, 10 Dec 2024 21:54:28 +0100 Subject: [PATCH 11/42] Base of Scala Lexer --- source/BNFC.cabal | 2 + source/src/BNFC/Backend/Scala.hs | 6 +- source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 160 ++++++++++++++++++ .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 50 ++++++ 4 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 source/src/BNFC/Backend/Scala/CFtoScalaLex.hs create mode 100644 source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 736d00e8a..235278389 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -275,6 +275,8 @@ library -- Scala backend BNFC.Backend.Scala BNFC.Backend.Scala.CFtoScalaAbs + BNFC.Backend.Scala.CFtoScalaLex + BNFC.Backend.Scala.CFtoScalaLexToken -- Agda backend BNFC.Backend.Agda diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs index 74c543410..ca2d60221 100644 --- a/source/src/BNFC/Backend/Scala.hs +++ b/source/src/BNFC/Backend/Scala.hs @@ -7,12 +7,16 @@ import BNFC.CF import BNFC.Options hiding (Backend) import BNFC.PrettyPrint (vcat, Doc) import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) +import BNFC.Backend.Scala.CFtoScalaLex (cf2ScalaLex) +import BNFC.Backend.Scala.CFtoScalaLexToken (cf2ScalaLexToken) -- | Entrypoint for the Scala backend. makeScala :: SharedOptions -> CF -> Backend makeScala opts cf = do - mkfile (name ++ ".scala") comment (cf2ScalaAbs opts cf) + mkfile (name ++ ".scala") comment $ cf2ScalaAbs opts cf + mkfile (name ++ "LexToken.scala") comment $ cf2ScalaLexToken opts cf + mkfile (name ++ "Lex.scala") comment $ cf2ScalaLex opts cf where name = lang opts comment :: String -> String diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs new file mode 100644 index 000000000..b64935653 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Lextract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin + + Description : This module generates the Scala Lextract Syntax + tree classes. It generates both a Header file + and an Implementation file + + Author : Juan Pablo Poittevin + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaLex (cf2ScalaLex) where + +import Prelude hiding ((<>)) + +import BNFC.Utils (symbolToName) +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options +import BNFC.Backend.Common (unicodeAndSymbols) +import Data.List + +-- | The result is two files (.H file, .C file) +cf2ScalaLex + :: SharedOptions + -> CF -- ^ Grammar. + -> Doc -- ^ @.H@ file, @.C@ file. +cf2ScalaLex Options{ lang } cf = vsep . concat $ + [ + [] + , imports + , initWorkflowClass + , getApplyFunction + , getTokensFunction symbs + , getIdentifiersFunction -- This should be called only in case of having identifiers in the lenguage, which anyway will be the case in most of the lenguages. + , getLiteralsFunction + , getIndentationsFunction + , endWorkflowClass + ] + where + symbs = unicodeAndSymbols cf + + +imports :: [Doc] +imports = [ + "package co.enear.parsercombinators.lexer" + , "import co.enear.parsercombinators.compiler.{Location, WorkflowLexerError}" + , "import scala.util.parsing.combinator.RegexParsers" + ] + + +initWorkflowClass :: [Doc] +initWorkflowClass = [ + "object WorkflowLexer extends RegexParsers {" + , nest 4 "override def skipWhitespace = true" + -- In case the lenguage use indentation for block creation, we should remove \n from the list of whiteSpace. + , nest 4 $ text $ "override val whiteSpace = " ++ "\"" ++ "[\\t\\r\\f\\n]+\".r" + ] + +endWorkflowClass :: [Doc] +endWorkflowClass = [ + "}" + ] + +getApplyFunction :: [Doc] +getApplyFunction = [ + "def apply(code: String): Either[WorkflowLexerError, List[WorkflowToken]] = {" + , nest 4 "parse(tokens, code) match {" + , nest 8 "case NoSuccess(msg, next) => Left(WorkflowLexerError(Location(next.pos.line, next.pos.column), msg))" + , nest 8 "case Success(result, next) => Right(result)" + , nest 4"}" + , "}" + ] + + +getIdentifiersFunction :: [Doc] +getIdentifiersFunction = [ + "def identifier: Parser[IDENTIFIER] = {" + , nest 4 "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENTIFIER(str) }" + , "}" + ] + +getLiteralsFunction :: [Doc] +getLiteralsFunction = [ + "def literal: Parser[LITERAL] = {" + , nest 4 "\"\"\"\"[^\"]*\"\"\"\".r ^^ { str =>" + , nest 8 "val content = str.substring(1, str.length - 1)" + , nest 8 "LITERAL(content)" + , nest 4 "}" + , "}" + ] + +getIndentationsFunction :: [Doc] +getIndentationsFunction = [ + "def indentation: Parser[INDENTATION] = {" + , nest 4 "\"\\n[ ]*\".r ^^ { whitespace =>" + , nest 8 "val nSpaces = whitespace.length - 1" + , nest 8 "INDENTATION(nSpaces)" + , nest 4 "}" + , "}" + ] + + +getTokensFunction :: [String] -> [Doc] +getTokensFunction symbs = [ + "def tokens: Parser[List[WorkflowToken]] = {" + -- TODO: Are the symbs enough for the token lexing? Check if we need to add any other data + -- At the end of this list of symbs, should be identifiers (if the lenguage accept identifiers), + , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ "))" + -- next line is needed in case of indentation use + -- , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ ")) ^^ { rawTokens =>" + -- , nest 8 "processIndent(rawTokens)" + , "}" + ] + +-- getListSymNames :: [String] -> [String] +-- getListSymNames symbs = map ( . symbolToName) symbs + +getKeywordParser :: String -> [Doc] +getKeywordParser symb = [ + text $ "def" ++ symb ++ "= " ++ symb ++ "^^ (_ => EXIT)" + ] + + +-- following functino must be added only in case of acepting indentation as block separetion +-- getProcessIndentFunction :: [Doc] +-- getProcessIndentFunction = [ +-- "private def processIndent(tokens: List[WorkflowToken], indents: List[Int] = List(0)): List[WorkflowToken] = {" +-- nest 4 "tokens.headOption match {" + +-- case Some(INDENTATION(spaces)) if spaces > indents.head => +-- INDENT() :: processIndentations(tokens.tail, spaces :: indents) + +-- // if there is a decrease, we pop from the stack until we have matched the new level and +-- // we produce a DEDENT for each pop +-- case Some(INDENTATION(spaces)) if spaces < indents.head => +-- val (dropped, kept) = indents.partition(_ > spaces) +-- (dropped map (_ => DEDENT())) ::: processIndentations(tokens.tail, kept) + +-- // if the indentation level stays unchanged, no tokens are produced +-- case Some(INDENTATION(spaces)) if spaces == indents.head => +-- processIndentations(tokens.tail, indents) + +-- // other tokens are ignored +-- case Some(token) => +-- token :: processIndentations(tokens.tail, indents) + +-- // the final step is to produce a DEDENT for each indentation level still remaining, thus +-- // "closing" the remaining open INDENTS +-- case None => +-- indents.filter(_ > 0).map(_ => DEDENT()) + +-- } +-- } +-- ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs new file mode 100644 index 000000000..1606e0552 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -0,0 +1,50 @@ + +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Lextract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin + + Description : This module generates the Scala Lextract Syntax + tree classes. It generates both a Header file + and an Implementation file + + Author : Juan Pablo Poittevin + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaLexToken (cf2ScalaLexToken) where + +import Prelude hiding ((<>)) + +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options +import BNFC.Backend.Common (unicodeAndSymbols) + +cf2ScalaLexToken + :: SharedOptions + -> CF + -> Doc +cf2ScalaLexToken Options{ lang } cf = vsep . concat $ + [ + [] + , headers + , [text $ concat $ map generateSymbClass symbs] + ] + where + datas = cf2data cf + symbs = unicodeAndSymbols cf + + +generateSymbClass :: String -> String +generateSymbClass symb = "case class " ++ symb ++ "() extends WorkflowToken \n" + +headers :: [Doc] +headers = [ + "package co.enear.parsercombinators.lexer" + , "import scala.util.parsing.input.Positional" + , "sealed trait WorkflowToken extends Positional" + ] From 25d7236f6ed49764c91004489ffe10ec9b2c6bd7 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Tue, 11 Feb 2025 19:25:16 +0100 Subject: [PATCH 12/42] working on Calc ScalaLex --- .gitignore | 2 + source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 108 +++++++++++++----- .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 30 +++-- 3 files changed, 106 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index ce92f1390..9cfdf04f5 100644 --- a/.gitignore +++ b/.gitignore @@ -176,3 +176,5 @@ cabal.sandbox.config ignore-dir/ .DS_Store +.vscode +.metals diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index b64935653..b00abf827 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -24,6 +24,10 @@ import BNFC.PrettyPrint import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) import Data.List +import Data.Char (toLower) + +toLowerString :: String -> String +toLowerString s = map toLower s -- | The result is two files (.H file, .C file) cf2ScalaLex @@ -33,26 +37,54 @@ cf2ScalaLex cf2ScalaLex Options{ lang } cf = vsep . concat $ [ [] - , imports + , imports lang + , addExtraClasses , initWorkflowClass - , getApplyFunction - , getTokensFunction symbs - , getIdentifiersFunction -- This should be called only in case of having identifiers in the lenguage, which anyway will be the case in most of the lenguages. - , getLiteralsFunction - , getIndentationsFunction + , map (nest 4) getApplyFunction + , map (nest 4) (getTokensFunction (symbs ++ liters)) + -- , map (nest 4) getIdentifiersFunction -- This should be called only in case of having identifiers in the lenguage, which anyway will be the case in most of the lenguages. + , map (nest 4) (addParserFunctions liters) + -- , map (nest 4) getLiteralsFunction + -- , getIndentationsFunction + , map (nest 4) (getKeywordParsers symbs) , endWorkflowClass ] where symbs = unicodeAndSymbols cf - - -imports :: [Doc] -imports = [ - "package co.enear.parsercombinators.lexer" - , "import co.enear.parsercombinators.compiler.{Location, WorkflowLexerError}" + liters = literals cf + +-- catString, catInteger, catDouble, catChar, catIdent :: TokenCat +-- catString = "String" +-- catInteger = "Integer" +-- catDouble = "Double" +-- catChar = "Char" +-- catIdent = "Ident" + +addParserFunctions :: [String] -> [Doc] +addParserFunctions liters = map addParserFunction liters + +addParserFunction :: String -> Doc +addParserFunction liter + | liter == catInteger = getIntegerFunction + | liter == catIdent = getIdentifiersFunction + | otherwise = "" + +imports :: String -> [Doc] +imports name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Lex" , "import scala.util.parsing.combinator.RegexParsers" ] +addExtraClasses :: [Doc] +addExtraClasses = [ + "sealed trait WorkflowCompilationError" + ,"case class WorkflowLexerError(location: Location, msg: String) extends WorkflowCompilationError" + ,"case class WorkflowParserError(location: Location, msg: String) extends WorkflowCompilationError" + ,"case class Location(line: Int, column: Int) {" + ,nest 4 "override def toString = s\"$line:$column\"" + ,"}" + ] + initWorkflowClass :: [Doc] initWorkflowClass = [ @@ -71,26 +103,33 @@ getApplyFunction :: [Doc] getApplyFunction = [ "def apply(code: String): Either[WorkflowLexerError, List[WorkflowToken]] = {" , nest 4 "parse(tokens, code) match {" - , nest 8 "case NoSuccess(msg, next) => Left(WorkflowLexerError(Location(next.pos.line, next.pos.column), msg))" - , nest 8 "case Success(result, next) => Right(result)" + , nest 6 "case NoSuccess(msg, next) => Left(WorkflowLexerError(Location(next.pos.line, next.pos.column), msg))" + , nest 6 "case Success(result, next) => Right(result)" , nest 4"}" , "}" ] -getIdentifiersFunction :: [Doc] -getIdentifiersFunction = [ +getIdentifiersFunction :: Doc +getIdentifiersFunction = vcat [ "def identifier: Parser[IDENTIFIER] = {" , nest 4 "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENTIFIER(str) }" , "}" ] +getIntegerFunction :: Doc +getIntegerFunction = vcat [ + "def integer: Parser[Integer] = {" + , nest 4 "\"[0-9]+\".r ^^ {i => Integer(i)}" + , "}" + ] + getLiteralsFunction :: [Doc] getLiteralsFunction = [ "def literal: Parser[LITERAL] = {" , nest 4 "\"\"\"\"[^\"]*\"\"\"\".r ^^ { str =>" - , nest 8 "val content = str.substring(1, str.length - 1)" - , nest 8 "LITERAL(content)" + , nest 6 "val content = str.substring(1, str.length - 1)" + , nest 6 "LITERAL(content)" , nest 4 "}" , "}" ] @@ -99,33 +138,48 @@ getIndentationsFunction :: [Doc] getIndentationsFunction = [ "def indentation: Parser[INDENTATION] = {" , nest 4 "\"\\n[ ]*\".r ^^ { whitespace =>" - , nest 8 "val nSpaces = whitespace.length - 1" - , nest 8 "INDENTATION(nSpaces)" + , nest 6 "val nSpaces = whitespace.length - 1" + , nest 6 "INDENTATION(nSpaces)" , nest 4 "}" , "}" ] +getSymbFromName :: String -> String +getSymbFromName s = + case symbolToName s of + Just s -> s + _ -> s + + + getTokensFunction :: [String] -> [Doc] getTokensFunction symbs = [ "def tokens: Parser[List[WorkflowToken]] = {" -- TODO: Are the symbs enough for the token lexing? Check if we need to add any other data -- At the end of this list of symbs, should be identifiers (if the lenguage accept identifiers), - , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ "))" + , nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))" -- next line is needed in case of indentation use -- , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ ")) ^^ { rawTokens =>" -- , nest 8 "processIndent(rawTokens)" , "}" ] --- getListSymNames :: [String] -> [String] --- getListSymNames symbs = map ( . symbolToName) symbs -getKeywordParser :: String -> [Doc] -getKeywordParser symb = [ - text $ "def" ++ symb ++ "= " ++ symb ++ "^^ (_ => EXIT)" - ] +getSymName :: String -> String +getSymName = toLowerString.getSymbFromName + +getListSymNames :: [String] -> [String] +getListSymNames symbs = map getSymName symbs + +getKeywordParsers :: [String] -> [Doc] +getKeywordParsers symbs = map getKeywordParser symbs + +-- def plus = positioned { "+" ^^ (_ => PLUS()) } +getKeywordParser :: String -> Doc +getKeywordParser symb = text $ "def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ =>"++ getSymbFromName symb ++"()) }" +-- def+= +^^ (_ =>+()) -- following functino must be added only in case of acepting indentation as block separetion -- getProcessIndentFunction :: [Doc] diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index 1606e0552..0494409d1 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -23,6 +23,7 @@ import BNFC.CF import BNFC.PrettyPrint import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) +import BNFC.Utils (symbolToName) cf2ScalaLexToken :: SharedOptions @@ -30,21 +31,36 @@ cf2ScalaLexToken -> Doc cf2ScalaLexToken Options{ lang } cf = vsep . concat $ [ - [] - , headers + headers lang , [text $ concat $ map generateSymbClass symbs] + , [generateStringClasses liters] ] where - datas = cf2data cf + liters = literals cf symbs = unicodeAndSymbols cf generateSymbClass :: String -> String -generateSymbClass symb = "case class " ++ symb ++ "() extends WorkflowToken \n" +generateSymbClass symb = case symbolToName symb of + Just s -> "case class " ++ s ++ "() extends WorkflowToken \n" + Nothing -> "" -headers :: [Doc] -headers = [ - "package co.enear.parsercombinators.lexer" + +generateIntegerClasses :: [String] -> Doc +generateIntegerClasses params = text $ concat $ map generateIntegerClass params + +generateIntegerClass :: String -> String +generateIntegerClass param = "case class " ++ param ++ "(i: Int) extends WorkflowToken \n" + +generateStringClasses :: [String] -> Doc +generateStringClasses params = text $ concat $ map generateStringClass params + +generateStringClass :: String -> String +generateStringClass param = "case class " ++ param ++ "(str: String) extends WorkflowToken \n" + +headers :: String -> [Doc] +headers name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Lex" , "import scala.util.parsing.input.Positional" , "sealed trait WorkflowToken extends Positional" ] From c22b42d6091c4391e7105570adfd584c08aa2420 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Fri, 21 Feb 2025 09:27:13 -0300 Subject: [PATCH 13/42] add scala lexer working --- source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 50 +++++++++++++------ .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 5 +- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index b00abf827..659c41ce0 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -66,7 +66,10 @@ addParserFunctions liters = map addParserFunction liters addParserFunction :: String -> Doc addParserFunction liter | liter == catInteger = getIntegerFunction + | liter == catDouble = getDoubleFunction | liter == catIdent = getIdentifiersFunction + | liter == catString = getLiteralsFunction + | liter == catChar = getLiteralFunction | otherwise = "" imports :: String -> [Doc] @@ -89,9 +92,10 @@ addExtraClasses = [ initWorkflowClass :: [Doc] initWorkflowClass = [ "object WorkflowLexer extends RegexParsers {" - , nest 4 "override def skipWhitespace = true" + -- TODO: I REMOVED THIS LINE TO MAKE IT WORK, BUT WILL FAIL WITH GRAMMARS WHICH USE TABS/SPACES AS BLOCKS DEFINITION, SHOULD WE WORK ON THIS? + -- , nest 4 "override def skipWhitespace = true" -- In case the lenguage use indentation for block creation, we should remove \n from the list of whiteSpace. - , nest 4 $ text $ "override val whiteSpace = " ++ "\"" ++ "[\\t\\r\\f\\n]+\".r" + -- , nest 4 $ text $ "override val whiteSpace = " ++ "\"" ++ "[\\t\\r\\f\\n]+\".r" ] endWorkflowClass :: [Doc] @@ -112,28 +116,48 @@ getApplyFunction = [ getIdentifiersFunction :: Doc getIdentifiersFunction = vcat [ - "def identifier: Parser[IDENTIFIER] = {" - , nest 4 "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENTIFIER(str) }" + "def ident: Parser[IDENT] = {" + , nest 4 "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENT(str) }" , "}" ] getIntegerFunction :: Doc getIntegerFunction = vcat [ - "def integer: Parser[Integer] = {" - , nest 4 "\"[0-9]+\".r ^^ {i => Integer(i)}" + "def integer: Parser[INTEGER] = {" + , nest 4 "\"[0-9]+\".r ^^ {i => INTEGER(i)}" , "}" ] -getLiteralsFunction :: [Doc] -getLiteralsFunction = [ - "def literal: Parser[LITERAL] = {" - , nest 4 "\"\"\"\"[^\"]*\"\"\"\".r ^^ { str =>" + +getDoubleFunction :: Doc +getDoubleFunction = vcat [ + "def double: Parser[Double] = {" + , nest 4 "\"[0-9]+.[0-9]+\".r ^^ {i => Double(i)}" + , "}" + ] + + +getLiteralsFunction :: Doc +getLiteralsFunction = vcat [ + "def string: Parser[STRING] = {" + , nest 4 "\"\\\"[^\\\"]*\\\"\".r ^^ { str =>" , nest 6 "val content = str.substring(1, str.length - 1)" - , nest 6 "LITERAL(content)" + , nest 6 "STRING(content)" , nest 4 "}" , "}" ] +getLiteralFunction :: Doc +getLiteralFunction = vcat [ + "def char: Parser[CHAR] = {" + , nest 4 "\"\\\'[^\\\']*\\\'\".r ^^ { str =>" + , nest 6 "val content = str.substring(1, str.length - 1)" + , nest 6 "CHAR(content)" + , nest 4 "}" + , "}" + ] + + getIndentationsFunction :: [Doc] getIndentationsFunction = [ "def indentation: Parser[INDENTATION] = {" @@ -155,9 +179,7 @@ getSymbFromName s = getTokensFunction :: [String] -> [Doc] getTokensFunction symbs = [ - "def tokens: Parser[List[WorkflowToken]] = {" - -- TODO: Are the symbs enough for the token lexing? Check if we need to add any other data - -- At the end of this list of symbs, should be identifiers (if the lenguage accept identifiers), + "def tokens: Parser[List[WorkflowToken]] = {" , nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))" -- next line is needed in case of indentation use -- , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ ")) ^^ { rawTokens =>" diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index 0494409d1..2688d4e54 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -24,6 +24,7 @@ import BNFC.PrettyPrint import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) import BNFC.Utils (symbolToName) +import Data.Char (toUpper) cf2ScalaLexToken :: SharedOptions @@ -56,7 +57,7 @@ generateStringClasses :: [String] -> Doc generateStringClasses params = text $ concat $ map generateStringClass params generateStringClass :: String -> String -generateStringClass param = "case class " ++ param ++ "(str: String) extends WorkflowToken \n" +generateStringClass param = "case class " ++ (map toUpper param) ++ "(str: String) extends WorkflowToken \n" headers :: String -> [Doc] headers name = [ @@ -64,3 +65,5 @@ headers name = [ , "import scala.util.parsing.input.Positional" , "sealed trait WorkflowToken extends Positional" ] + + From 202578cde17fe0b0ade22f5c3deb3de85cfc9af6 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sun, 16 Mar 2025 17:16:13 +0100 Subject: [PATCH 14/42] primera version de parser --- source/BNFC.cabal | 2 + source/src/BNFC/Backend/Scala.hs | 4 + source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 11 - source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 9 +- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 361 ++++++++++++++++++ .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 69 ++++ 6 files changed, 439 insertions(+), 17 deletions(-) create mode 100644 source/src/BNFC/Backend/Scala/CFtoScalaParser.hs create mode 100644 source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 235278389..ae505a53e 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -277,6 +277,8 @@ library BNFC.Backend.Scala.CFtoScalaAbs BNFC.Backend.Scala.CFtoScalaLex BNFC.Backend.Scala.CFtoScalaLexToken + BNFC.Backend.Scala.CFtoScalaParser + BNFC.Backend.Scala.CFtoScalaParserAST -- Agda backend BNFC.Backend.Agda diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs index ca2d60221..6da857846 100644 --- a/source/src/BNFC/Backend/Scala.hs +++ b/source/src/BNFC/Backend/Scala.hs @@ -9,6 +9,8 @@ import BNFC.PrettyPrint (vcat, Doc) import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) import BNFC.Backend.Scala.CFtoScalaLex (cf2ScalaLex) import BNFC.Backend.Scala.CFtoScalaLexToken (cf2ScalaLexToken) +import BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) +import BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) -- | Entrypoint for the Scala backend. @@ -17,6 +19,8 @@ makeScala opts cf = do mkfile (name ++ ".scala") comment $ cf2ScalaAbs opts cf mkfile (name ++ "LexToken.scala") comment $ cf2ScalaLexToken opts cf mkfile (name ++ "Lex.scala") comment $ cf2ScalaLex opts cf + mkfile (name ++ "Parser.scala") comment $ cf2ScalaParser opts cf + mkfile (name ++ "ParserAST.scala") comment $ cf2ScalaParserAST opts cf where name = lang opts comment :: String -> String diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index 8aaf753c0..0d7bf5094 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -41,17 +41,6 @@ cf2ScalaAbs Options{ lang } cf = vsep . concat $ datas = cf2data cf functions = vcat (map prData datas) - -dataToString :: (Cat, [(String, [Cat])]) -> String -dataToString (cat, []) = catToStr cat -dataToString (cat, cats) = "Main Cat:" ++ catToStr cat ++ " List of [(String, [Cat])]: " ++ strCatToString cats - - -strCatToString :: [(String, [Cat])] -> String -strCatToString ([]) = "" -strCatToString (ncat:[]) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) -strCatToString (ncat:cats) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) ++ strCatToString cats - classSign :: Doc classSign = "abstract class AbstractVisitor[R, A] extends AllVisitor[R, A]{" diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index 659c41ce0..d4308c49b 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -7,9 +7,7 @@ Copyright (Scala) 2024 Author: Juan Pablo Poittevin Description : This module generates the Scala Lextract Syntax - tree classes. It generates both a Header file - and an Implementation file - + tree classes. Using Scala Parser Combinator Author : Juan Pablo Poittevin Created : 30 September, 2024 -} @@ -29,11 +27,10 @@ import Data.Char (toLower) toLowerString :: String -> String toLowerString s = map toLower s --- | The result is two files (.H file, .C file) cf2ScalaLex :: SharedOptions - -> CF -- ^ Grammar. - -> Doc -- ^ @.H@ file, @.C@ file. + -> CF -- Grammar. + -> Doc cf2ScalaLex Options{ lang } cf = vsep . concat $ [ [] diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs new file mode 100644 index 000000000..a590cdbe1 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -0,0 +1,361 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Parser syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin + + Description : This module generates the Scala Parser Syntax + Using Scala Parser Combinator + Author : Juan Pablo Poittevin + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where + +import Prelude hiding ((<>)) + +import qualified Data.Foldable as DF (toList) +import BNFC.Utils (symbolToName, unless) +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options +import BNFC.Backend.Common (unicodeAndSymbols) +import Data.List +import BNFC.Backend.Common.OOAbstract (cf2cabs, absclasses) +import Data.Map (Map, toList) +import BNFC.Backend.Common.NamedVariables (firstLowerCase, numVars) +import BNFC.Backend.Common.StrUtils (renderCharOrString) +import BNFC.Backend.Haskell.Utils (catToType) + +cf2ScalaParser + :: SharedOptions + -> CF -- Grammar. + -> Doc +cf2ScalaParser Options{ lang } cf = vsep . concat $ + [ + [] + , imports lang + , initWorkflowClass + , map (nest 4) addExtraClasses + , map (nest 4) getApplyFunction + , map (nest 4) (getProgramFunction cf) + , map (nest 4) getBlockFunction + -- , map (nest 4) (addParserFunctions liters) + -- , map (nest 4) (getKeywordParsers symbs) + , strRules + , endWorkflowClass + ] + where + -- absc = absclasses cabs + -- cabs = cf2cabs cf + -- symbs = unicodeAndSymbols cf + -- rules = ruleGroupsInternals cf + -- cfKeywords = reservedWords cf + -- cfgSign = signatureToString (cfgSignature cf) + -- astData = specialData cf + -- astData = getAbstractSyntax cf + -- datasToPrint = getDatas astData + -- parserCats = allParserCats cf + strRules = ruleGroupsCFToString (ruleGroups cf) + +-- Función para convertir Signature en String sin position +-- signatureToString :: Map String (WithPosition Type) -> String +-- signatureToString sig = +-- intercalate "\n" [key ++ " -> " ++ show (wpThing wp) | (key, wp) <- toList sig] + +-- dataToString :: Data -> String +-- dataToString (cat, []) = catToStr cat +-- dataToString (cat, cats) = "Main Cat: " ++ catToStr cat ++ " : " ++ strCatToString cats + +-- strCatToString :: [(String, [Cat])] -> String +-- strCatToString ([]) = "" +-- strCatToString (ncat:[]) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) +-- strCatToString (ncat:cats) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) ++ strCatToString cats + +-- getDatas :: [Data] -> [Doc] +-- getDatas datas = map (text . dataToString) datas + +-- ruleToString :: Rule fun -> String +-- ruleToString (funRule) = "" +-- ruleToString (valRCat) = "" +-- ruleToString (rhsRule) = "" +-- ruleToString (internal) = "" + +getSymbFromName :: String -> String +getSymbFromName s = + case symbolToName s of + Just s -> s + _ -> s + +-- renderX :: String -> Doc +-- renderX sep' = "render" <> char sc <> parens (text sep) +-- where (sc, sep) = renderCharOrString sep' + +-- basicFunName :: TokenCat -> String +-- basicFunName k +-- | k `elem` baseTokenCatNames = k +-- | otherwise = "Ident" + +-- prPrintItem :: String -> Either (Cat, Doc) String -> String +-- prPrintItem pre = \case +-- Right t -> getSymbFromName t +-- Left (cat, nt) -> concat +-- [ "case " +-- -- Esta linea obtiene el TokenCat, y le aplica basicFunName, si no es un TokenCat, le aplica el dentCat $ normCat, es decir que lo maneja como un ListCat +-- -- esta obviando el CoercCat (Exp1, Exp2, etc) +-- , maybe (identCat $ normCat cat) basicFunName $ maybeTokenCat cat +-- , "(", pre, render nt, ", ", show (precCat cat), ");" +-- ] + + +catToStrings :: [Either Cat String] -> [String] +catToStrings = map (\case + Left c -> show c + Right s -> s + ) + + +prPrintRule_ :: IsFun a => String -> Rul a -> [String] +prPrintRule_ _ (Rule _ _ items _) = map ((++ "()") . getSymbFromName) $ catToStrings items + + +prCoerciveRule :: Rule -> [String] +prCoerciveRule r@(Rule _ _ _ _) = concat [ + [render type'] + ] + where + type' = catToType id empty $ valCat r + + +prSubRule :: Rule -> [String] +prSubRule r@(Rule fun cat _ _) + -- | isCoercion fun = prCoerciveRule r + | isCoercion fun = [""] + | otherwise = + [ + -- [ "def " ++ pre ++ ": Parser[" ++ fnm ++ "] = {"] + "case " ++ intercalate " ~ " (prPrintRule_ pre r) ++ " => " + ++ fnm ++ "(" ++ intercalate " , " (prPrintRule_ pre r) ++ ")" + -- , [ "case " ++ show p ++ ") renderC(_R_PAREN);" + -- , "}" + -- ] + -- , ["}"] + ] + where + fnm = funName fun + pre = firstLowerCase fnm + + +-- prRules :: Bool -> Rul a -> Doc +-- prRules functor = vsep . map prOne +-- where +-- prOne (_ , [] ) = empty -- nt has only internal use +-- prOne (nt, (p,a):ls) = vcat +-- [ hsep [ nt', "::", "{", if functor then functorType' nt else type' nt, "}" ] +-- , hang nt' 2 $ sep (pr ":" (p, a) : map (pr "|") ls) +-- ] +-- where +-- nt' = text (identCat nt) +-- pr pre (p,a) = hsep [pre, text p, "{", text a , "}"] +-- type' = catToType qualify empty +-- functorType' nt = hcat ["(", qualify posType, ", ", type' nt, ")"] +-- qualify +-- | null absM = id +-- | otherwise = ((text absM <> ".") <>) + +coerCatDefSign :: Cat -> Doc +coerCatDefSign cat = + text $ "def " ++ pre ++ ": Parser[" ++ sCat ++ "]" + where + sCat = render $ catToType id empty cat + pre = firstLowerCase sCat + +prSubRuleDoc :: Rule -> [Doc] +prSubRuleDoc r = map text (prSubRule r) + +rulesToString :: [Rule] -> [Doc] +rulesToString ([]) = [""] +rulesToString (r:[]) = prSubRuleDoc r +rulesToString (r:rs) = prSubRuleDoc r ++ rulesToString rs + +ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] +ruleGroupsCFToString ([]) = [""] +ruleGroupsCFToString ((c, r):[] ) = [coerCatDefSign c] ++ [codeblock 4 (rulesToString r)] +ruleGroupsCFToString ((c, r):crs) = [coerCatDefSign c] ++ [codeblock 4 (rulesToString r)] ++ (ruleGroupsCFToString crs) + + +imports :: String -> [Doc] +imports name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser" + , "import co.enear.parsercombinators.compiler.{Location, WorkflowParserError}" + , "import co.enear.parsercombinators.lexer._" + , "import scala.util.parsing.combinator.Parsers" + , "import scala.util.parsing.input.{NoPosition, Position, Reader}" + ] + + +addExtraClasses :: [Doc] +addExtraClasses = [ + "class WorkflowTokenReader(tokens: Seq[WorkflowToken]) extends Reader[WorkflowToken] {" + , nest 4 "override def first: WorkflowToken = tokens.head" + , nest 4 "override def atEnd: Boolean = tokens.isEmpty" + , nest 4 "override def pos: Position = tokens.headOption.map(_.pos).getOrElse(NoPosition)" + , nest 4 "override def rest: Reader[WorkflowToken] = new WorkflowTokenReader(tokens.tail)" + , "}" + ] + + +initWorkflowClass :: [Doc] +initWorkflowClass = [ + "object WorkflowParser extends Parsers {" + , nest 4 "override type Elem = WorkflowToken" + -- TODO: I REMOVED THIS LINE TO MAKE IT WORK, BUT WILL FAIL WITH GRAMMARS WHICH USE TABS/SPACES AS BLOCKS DEFINITION, SHOULD WE WORK ON THIS? + -- , nest 4 "override def skipWhitespace = true" + -- In case the lenguage use indentation for block creation, we should remove \n from the list of whiteSpace. + -- , nest 4 $ text $ "override val whiteSpace = " ++ "\"" ++ "[\\t\\r\\f\\n]+\".r" + ] + +endWorkflowClass :: [Doc] +endWorkflowClass = [ + "}" + ] + +getApplyFunction :: [Doc] +getApplyFunction = [ + "def apply(tokens: Seq[WorkflowToken]): Either[WorkflowParserError, WorkflowAST] = {" + , nest 4 "val reader = new WorkflowTokenReader(tokens)" + , nest 4 "program(reader) match {" + , nest 4 "case NoSuccess(msg, next) => Left(WorkflowParserError(Location(next.pos.line, next.pos.column), msg))" + , nest 4 "case Success(result, next) => Right(result)" + , nest 4 "}" + , "}" + ] + + + +getProgramFunction :: CF -> [Doc] +getProgramFunction cf = [ + "def program: Parser[WorkflowAST] = positioned {" + , nest 4 $ text $ "phrase(" ++ (intercalate " | " epsStr) ++ ")" + ,"}" + ] + where + epsStr = map show eps + eps = DF.toList (allEntryPoints cf) + + +getBlockFunction :: [Doc] +getBlockFunction = [ + "def block: Parser[WorkflowAST] = positioned {" + , nest 4 "rep1(statement) ^^ { case stmtList => stmtList reduceRight AndThen }" + , "}" + ] + + +getDoubleFunction :: Doc +getDoubleFunction = vcat [ + "def double: Parser[Double] = {" + , nest 4 "\"[0-9]+.[0-9]+\".r ^^ {i => Double(i)}" + , "}" + ] + + +getLiteralsFunction :: Doc +getLiteralsFunction = vcat [ + "def string: Parser[STRING] = {" + , nest 4 "\"\\\"[^\\\"]*\\\"\".r ^^ { str =>" + , nest 6 "val content = str.substring(1, str.length - 1)" + , nest 6 "STRING(content)" + , nest 4 "}" + , "}" + ] + +getLiteralFunction :: Doc +getLiteralFunction = vcat [ + "def char: Parser[CHAR] = {" + , nest 4 "\"\\\'[^\\\']*\\\'\".r ^^ { str =>" + , nest 6 "val content = str.substring(1, str.length - 1)" + , nest 6 "CHAR(content)" + , nest 4 "}" + , "}" + ] + + +getIndentationsFunction :: [Doc] +getIndentationsFunction = [ + "def indentation: Parser[INDENTATION] = {" + , nest 4 "\"\\n[ ]*\".r ^^ { whitespace =>" + , nest 6 "val nSpaces = whitespace.length - 1" + , nest 6 "INDENTATION(nSpaces)" + , nest 4 "}" + , "}" + ] + + +-- getSymbFromName :: String -> String +-- getSymbFromName s = +-- case symbolToName s of +-- Just s -> s +-- _ -> s + + + +-- getTokensFunction :: [String] -> [Doc] +-- getTokensFunction symbs = [ +-- "def tokens: Parser[List[WorkflowToken]] = {" +-- , nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))" +-- -- next line is needed in case of indentation use +-- -- , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ ")) ^^ { rawTokens =>" +-- -- , nest 8 "processIndent(rawTokens)" +-- , "}" +-- ] + + +-- getSymName :: String -> String +-- getSymName = toLowerString.getSymbFromName + +-- getListSymNames :: [String] -> [String] +-- getListSymNames symbs = map getSymName symbs + +-- getKeywordParsers :: [String] -> [Doc] +-- getKeywordParsers symbs = map getKeywordParser symbs + + +-- def plus = positioned { "+" ^^ (_ => PLUS()) } +-- getKeywordParser :: String -> Doc +-- getKeywordParser symb = text $ "def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ =>"++ getSymbFromName symb ++"()) }" +-- def+= +^^ (_ =>+()) + +-- following functino must be added only in case of acepting indentation as block separetion +-- getProcessIndentFunction :: [Doc] +-- getProcessIndentFunction = [ +-- "private def processIndent(tokens: List[WorkflowToken], indents: List[Int] = List(0)): List[WorkflowToken] = {" +-- nest 4 "tokens.headOption match {" + +-- case Some(INDENTATION(spaces)) if spaces > indents.head => +-- INDENT() :: processIndentations(tokens.tail, spaces :: indents) + +-- // if there is a decrease, we pop from the stack until we have matched the new level and +-- // we produce a DEDENT for each pop +-- case Some(INDENTATION(spaces)) if spaces < indents.head => +-- val (dropped, kept) = indents.partition(_ > spaces) +-- (dropped map (_ => DEDENT())) ::: processIndentations(tokens.tail, kept) + +-- // if the indentation level stays unchanged, no tokens are produced +-- case Some(INDENTATION(spaces)) if spaces == indents.head => +-- processIndentations(tokens.tail, indents) + +-- // other tokens are ignored +-- case Some(token) => +-- token :: processIndentations(tokens.tail, indents) + +-- // the final step is to produce a DEDENT for each indentation level still remaining, thus +-- // "closing" the remaining open INDENTS +-- case None => +-- indents.filter(_ > 0).map(_ => DEDENT()) + +-- } +-- } +-- ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs new file mode 100644 index 000000000..3300f59b8 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -0,0 +1,69 @@ + +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Scala Lextract syntax + Copyright (Scala) 2024 Author: Juan Pablo Poittevin + + Description : This module generates the Scala Lextract Syntax + tree classes. It generates both a Header file + and an Implementation file + + Author : Juan Pablo Poittevin + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) where + +import Prelude hiding ((<>)) + +import BNFC.CF +import BNFC.PrettyPrint +import BNFC.Options +import BNFC.Backend.Common (unicodeAndSymbols) +import BNFC.Utils (symbolToName) +import Data.Char (toUpper) + +cf2ScalaParserAST + :: SharedOptions + -> CF + -> Doc +cf2ScalaParserAST Options{ lang } cf = vsep . concat $ + [ + headers lang + , [text $ concat $ map generateSymbClass symbs] + , [generateStringClasses liters] + ] + where + liters = literals cf + symbs = unicodeAndSymbols cf + + +generateSymbClass :: String -> String +generateSymbClass symb = case symbolToName symb of + Just s -> "case class " ++ s ++ "() extends WorkflowAST \n" + Nothing -> "" + + +generateIntegerClasses :: [String] -> Doc +generateIntegerClasses params = text $ concat $ map generateIntegerClass params + +generateIntegerClass :: String -> String +generateIntegerClass param = "case class " ++ param ++ "(i: Int) extends WorkflowAST \n" + +generateStringClasses :: [String] -> Doc +generateStringClasses params = text $ concat $ map generateStringClass params + +generateStringClass :: String -> String +generateStringClass param = "case class " ++ (map toUpper param) ++ "(str: String) extends WorkflowAST \n" + +headers :: String -> [Doc] +headers name = [ + text $ "package " ++ name ++ ".WorkflowAST." ++ name ++ "ParserAST" + , "import scala.util.parsing.input.Positional" + , "sealed trait WorkflowAST extends Positional" + ] + + From f9553187dd65f17e63fe64c9f5c16ef12b5c9936 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Thu, 20 Mar 2025 21:12:36 +0100 Subject: [PATCH 15/42] improved parser --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 73 ++++++++++--------- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 55 ++++++++++++-- 2 files changed, 87 insertions(+), 41 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index a590cdbe1..2b716ab5f 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -17,7 +17,7 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import Prelude hiding ((<>)) import qualified Data.Foldable as DF (toList) -import BNFC.Utils (symbolToName, unless) +import BNFC.Utils (symbolToName, mkNames, NameStyle (..)) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options @@ -28,6 +28,7 @@ import Data.Map (Map, toList) import BNFC.Backend.Common.NamedVariables (firstLowerCase, numVars) import BNFC.Backend.Common.StrUtils (renderCharOrString) import BNFC.Backend.Haskell.Utils (catToType) +import Data.Maybe (isNothing) cf2ScalaParser :: SharedOptions @@ -83,12 +84,6 @@ cf2ScalaParser Options{ lang } cf = vsep . concat $ -- ruleToString (rhsRule) = "" -- ruleToString (internal) = "" -getSymbFromName :: String -> String -getSymbFromName s = - case symbolToName s of - Just s -> s - _ -> s - -- renderX :: String -> Doc -- renderX sep' = "render" <> char sc <> parens (text sep) -- where (sc, sep) = renderCharOrString sep' @@ -116,9 +111,14 @@ catToStrings = map (\case Right s -> s ) +getSymbFromName :: String -> String +getSymbFromName s = + case symbolToName s of + Just s -> s ++ "()" + _ -> s prPrintRule_ :: IsFun a => String -> Rul a -> [String] -prPrintRule_ _ (Rule _ _ items _) = map ((++ "()") . getSymbFromName) $ catToStrings items +prPrintRule_ _ (Rule _ _ items _) = map getSymbFromName $ catToStrings items prCoerciveRule :: Rule -> [String] @@ -135,42 +135,33 @@ prSubRule r@(Rule fun cat _ _) | isCoercion fun = [""] | otherwise = [ - -- [ "def " ++ pre ++ ": Parser[" ++ fnm ++ "] = {"] - "case " ++ intercalate " ~ " (prPrintRule_ pre r) ++ " => " - ++ fnm ++ "(" ++ intercalate " , " (prPrintRule_ pre r) ++ ")" - -- , [ "case " ++ show p ++ ") renderC(_R_PAREN);" - -- , "}" - -- ] - -- , ["}"] + "case (" ++ intercalate " ~ " vars ++ ") => " + ++ fnm ++ "(" ++ intercalate " , " varsNoSymbs ++ ")" + ] where + varsNoSymbs = filter (\x -> isNothing (symbolToName x)) vars + -- varsNoSymbs = [pa | Nothing <- map symbolToName vars] + vars = prPrintRule_ pre r fnm = funName fun pre = firstLowerCase fnm --- prRules :: Bool -> Rul a -> Doc --- prRules functor = vsep . map prOne --- where --- prOne (_ , [] ) = empty -- nt has only internal use --- prOne (nt, (p,a):ls) = vcat --- [ hsep [ nt', "::", "{", if functor then functorType' nt else type' nt, "}" ] --- , hang nt' 2 $ sep (pr ":" (p, a) : map (pr "|") ls) --- ] --- where --- nt' = text (identCat nt) --- pr pre (p,a) = hsep [pre, text p, "{", text a , "}"] --- type' = catToType qualify empty --- functorType' nt = hcat ["(", qualify posType, ", ", type' nt, ")"] --- qualify --- | null absM = id --- | otherwise = ((text absM <> ".") <>) +-- def exp: Parser[WorkflowAST] = positioned { +-- exp1 ~ rep((PLUS() | MINUS()) ~ exp1) ^^ { +-- case exp1 ~ list => list.foldLeft(exp1) { +-- case (e1, PLUS() ~ e2) => EAdd(e1, e2) +-- case (e1, MINUS() ~ e2) => ESub(e1, e2) +-- } +-- } +-- } coerCatDefSign :: Cat -> Doc coerCatDefSign cat = - text $ "def " ++ pre ++ ": Parser[" ++ sCat ++ "]" + text $ "def " ++ pre ++ ": Parser[WorkflowAST] = positioned {" where sCat = render $ catToType id empty cat - pre = firstLowerCase sCat + pre = firstLowerCase $ show cat prSubRuleDoc :: Rule -> [Doc] prSubRuleDoc r = map text (prSubRule r) @@ -180,6 +171,17 @@ rulesToString ([]) = [""] rulesToString (r:[]) = prSubRuleDoc r rulesToString (r:rs) = prSubRuleDoc r ++ rulesToString rs + +-- generateDefCat :: (Cat,[Rule]) -> [Doc] +-- generateDefCat (c, rs) = [coerCatDefSign c] ++ ( +-- [ + +-- ] +-- ) +-- where + + + ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] ruleGroupsCFToString ([]) = [""] ruleGroupsCFToString ((c, r):[] ) = [coerCatDefSign c] ++ [codeblock 4 (rulesToString r)] @@ -238,12 +240,11 @@ getApplyFunction = [ getProgramFunction :: CF -> [Doc] getProgramFunction cf = [ "def program: Parser[WorkflowAST] = positioned {" - , nest 4 $ text $ "phrase(" ++ (intercalate " | " epsStr) ++ ")" + , nest 4 $ text $ "phrase(" ++ eps ++ ")" ,"}" ] where - epsStr = map show eps - eps = DF.toList (allEntryPoints cf) + eps = firstLowerCase $ show $ head $ map normCat $ DF.toList $ allEntryPoints cf getBlockFunction :: [Doc] diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 3300f59b8..39e7bf1b4 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -26,6 +26,9 @@ import BNFC.Backend.Common (unicodeAndSymbols) import BNFC.Utils (symbolToName) import Data.Char (toUpper) +import BNFC.Backend.Common.NamedVariables (firstLowerCase) +import Data.List (intercalate) + cf2ScalaParserAST :: SharedOptions -> CF @@ -33,12 +36,18 @@ cf2ScalaParserAST cf2ScalaParserAST Options{ lang } cf = vsep . concat $ [ headers lang - , [text $ concat $ map generateSymbClass symbs] - , [generateStringClasses liters] + -- , [text $ concat $ map generateSymbClass symbs] + -- , [generateStringClasses liters] + -- , map (text.show) parserCats + , strRules ] where - liters = literals cf - symbs = unicodeAndSymbols cf + liters = literals cf + symbs = unicodeAndSymbols cf + parserCats = allParserCats cf + strRules = prRulesNames rules + rules = ruleGroups cf + generateSymbClass :: String -> String @@ -59,9 +68,45 @@ generateStringClasses params = text $ concat $ map generateStringClass params generateStringClass :: String -> String generateStringClass param = "case class " ++ (map toUpper param) ++ "(str: String) extends WorkflowAST \n" + +-- case class EAdd(exp: WorkflowAST, exp1: WorkflowAST) extends WorkflowAST + +catFilterToStrings :: [Either Cat String] -> [String] +catFilterToStrings = map (\case + Left c -> show c + Right s -> "" + ) + + +prRuleName :: Rule -> [Doc] +prRuleName r@(Rule fun cat rhs _) + -- | isCoercion fun = prCoerciveRule r + | isCoercion fun = [""] + | otherwise = [ + text $ "case class " ++ fnm ++ "(" + , text $ intercalate ", " $ map (++ ": WorkflowAST") parsNames + -- , intersperse (text ", ") (filter (not . null) [text (param ++ ": WorkflowAST") | param <- parsNames, not (null param)]) + , " ) extends WorkflowAST" + ] + where + fnm = funName fun + parsNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) + +ruleIterPr :: [Rule] -> [Doc] +ruleIterPr [] = [""] +ruleIterPr (r:[]) = prRuleName r +ruleIterPr (r:rs) = prRuleName r ++ ruleIterPr rs + + +prRulesNames :: [(Cat,[Rule])] -> [Doc] +prRulesNames ( [] ) = [""] +prRulesNames ((_, r):[] ) = ruleIterPr r +prRulesNames ((_, r):crs ) = ruleIterPr r ++ prRulesNames crs + + headers :: String -> [Doc] headers name = [ - text $ "package " ++ name ++ ".WorkflowAST." ++ name ++ "ParserAST" + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser" , "import scala.util.parsing.input.Positional" , "sealed trait WorkflowAST extends Positional" ] From c057531c7f6af64edfcf5cc6c70955d6dc21752a Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Thu, 20 Mar 2025 23:14:38 +0100 Subject: [PATCH 16/42] working version of parser with calc.cf --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 219 ++++++------------ 1 file changed, 67 insertions(+), 152 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 2b716ab5f..736a1efb0 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -29,6 +29,7 @@ import BNFC.Backend.Common.NamedVariables (firstLowerCase, numVars) import BNFC.Backend.Common.StrUtils (renderCharOrString) import BNFC.Backend.Haskell.Utils (catToType) import Data.Maybe (isNothing) +import Data.Char (toLower) cf2ScalaParser :: SharedOptions @@ -42,11 +43,12 @@ cf2ScalaParser Options{ lang } cf = vsep . concat $ , map (nest 4) addExtraClasses , map (nest 4) getApplyFunction , map (nest 4) (getProgramFunction cf) - , map (nest 4) getBlockFunction + -- , map (nest 4) getBlockFunction -- , map (nest 4) (addParserFunctions liters) -- , map (nest 4) (getKeywordParsers symbs) - , strRules + , map (nest 4) strRules , endWorkflowClass + , addWorkflowCompiler ] where -- absc = absclasses cabs @@ -61,48 +63,6 @@ cf2ScalaParser Options{ lang } cf = vsep . concat $ -- parserCats = allParserCats cf strRules = ruleGroupsCFToString (ruleGroups cf) --- Función para convertir Signature en String sin position --- signatureToString :: Map String (WithPosition Type) -> String --- signatureToString sig = --- intercalate "\n" [key ++ " -> " ++ show (wpThing wp) | (key, wp) <- toList sig] - --- dataToString :: Data -> String --- dataToString (cat, []) = catToStr cat --- dataToString (cat, cats) = "Main Cat: " ++ catToStr cat ++ " : " ++ strCatToString cats - --- strCatToString :: [(String, [Cat])] -> String --- strCatToString ([]) = "" --- strCatToString (ncat:[]) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) --- strCatToString (ncat:cats) = " \n Sub Cat: " ++ fst ncat ++ " Is compose of: " ++ intercalate " " (map catToStr (snd ncat)) ++ strCatToString cats - --- getDatas :: [Data] -> [Doc] --- getDatas datas = map (text . dataToString) datas - --- ruleToString :: Rule fun -> String --- ruleToString (funRule) = "" --- ruleToString (valRCat) = "" --- ruleToString (rhsRule) = "" --- ruleToString (internal) = "" - --- renderX :: String -> Doc --- renderX sep' = "render" <> char sc <> parens (text sep) --- where (sc, sep) = renderCharOrString sep' - --- basicFunName :: TokenCat -> String --- basicFunName k --- | k `elem` baseTokenCatNames = k --- | otherwise = "Ident" - --- prPrintItem :: String -> Either (Cat, Doc) String -> String --- prPrintItem pre = \case --- Right t -> getSymbFromName t --- Left (cat, nt) -> concat --- [ "case " --- -- Esta linea obtiene el TokenCat, y le aplica basicFunName, si no es un TokenCat, le aplica el dentCat $ normCat, es decir que lo maneja como un ListCat --- -- esta obviando el CoercCat (Exp1, Exp2, etc) --- , maybe (identCat $ normCat cat) basicFunName $ maybeTokenCat cat --- , "(", pre, render nt, ", ", show (precCat cat), ");" --- ] catToStrings :: [Either Cat String] -> [String] @@ -117,8 +77,8 @@ getSymbFromName s = Just s -> s ++ "()" _ -> s -prPrintRule_ :: IsFun a => String -> Rul a -> [String] -prPrintRule_ _ (Rule _ _ items _) = map getSymbFromName $ catToStrings items +prPrintRule_ :: Rule -> [String] +prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ catToStrings items prCoerciveRule :: Rule -> [String] @@ -128,34 +88,25 @@ prCoerciveRule r@(Rule _ _ _ _) = concat [ where type' = catToType id empty $ valCat r +safeTail :: [a] -> [a] +safeTail [] = [] -- Si la lista está vacía, devuelve una lista vacía +safeTail (_:xs) = xs -prSubRule :: Rule -> [String] -prSubRule r@(Rule fun cat _ _) - -- | isCoercion fun = prCoerciveRule r - | isCoercion fun = [""] - | otherwise = - [ - "case (" ++ intercalate " ~ " vars ++ ") => " - ++ fnm ++ "(" ++ intercalate " , " varsNoSymbs ++ ")" +filterSymbs = filter (not . isSuffixOf "()") +onlySymbs = filter (isSuffixOf "()") - ] +prSubRule :: Rule -> [String] +prSubRule r@(Rule fun _ _ _) + | isCoercion fun = [] + | otherwise = + ["case (" ++ head vars ++ ", " ++ intercalate " ~ " (safeTail vars) ++ ") => " + ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")"] where - varsNoSymbs = filter (\x -> isNothing (symbolToName x)) vars - -- varsNoSymbs = [pa | Nothing <- map symbolToName vars] - vars = prPrintRule_ pre r + vars = disambiguateNames $ map modifyVars (prPrintRule_ r) fnm = funName fun - pre = firstLowerCase fnm - - --- def exp: Parser[WorkflowAST] = positioned { --- exp1 ~ rep((PLUS() | MINUS()) ~ exp1) ^^ { --- case exp1 ~ list => list.foldLeft(exp1) { --- case (e1, PLUS() ~ e2) => EAdd(e1, e2) --- case (e1, MINUS() ~ e2) => ESub(e1, e2) --- } --- } --- } - + modifyVars str = if "()" `isSuffixOf` str then str else [toLower (head str)] + + coerCatDefSign :: Cat -> Doc coerCatDefSign cat = text $ "def " ++ pre ++ ": Parser[WorkflowAST] = positioned {" @@ -166,33 +117,43 @@ coerCatDefSign cat = prSubRuleDoc :: Rule -> [Doc] prSubRuleDoc r = map text (prSubRule r) -rulesToString :: [Rule] -> [Doc] -rulesToString ([]) = [""] -rulesToString (r:[]) = prSubRuleDoc r -rulesToString (r:rs) = prSubRuleDoc r ++ rulesToString rs - --- generateDefCat :: (Cat,[Rule]) -> [Doc] --- generateDefCat (c, rs) = [coerCatDefSign c] ++ ( --- [ - --- ] --- ) --- where +filterNotEqual :: Eq a => a -> [a] -> [a] +filterNotEqual element list = filter (/= element) list + +rulesToString :: [Rule] -> [Doc] +rulesToString [] = [""] +rulesToString rules@(Rule fun cat _ _ : _) = + let + -- Filtramos las reglas que pertenecen a la misma categoría `cat` + (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules + + -- Obtenemos los símbolos de las reglas filtradas + vars = concatMap prPrintRule_ sameCat + fnm = funName fun + exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars + + -- Generamos la cabecera única con todas las alternativas dentro de `rep(...)` + header = text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" + subHeader = text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" + + -- Generamos los `case` correspondientes a las reglas de `sameCat` + rulesDocs = concatMap prSubRuleDoc sameCat + in [header] ++ map (nest 4) [subHeader] ++ map (nest 8) rulesDocs ++ [nest 4 "}"] ++ ["}"] ++ rulesToString rest ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] -ruleGroupsCFToString ([]) = [""] -ruleGroupsCFToString ((c, r):[] ) = [coerCatDefSign c] ++ [codeblock 4 (rulesToString r)] -ruleGroupsCFToString ((c, r):crs) = [coerCatDefSign c] ++ [codeblock 4 (rulesToString r)] ++ (ruleGroupsCFToString crs) +ruleGroupsCFToString [] = [""] +ruleGroupsCFToString ((c, r):crs) = + [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] ++ ruleGroupsCFToString crs + imports :: String -> [Doc] imports name = [ text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser" - , "import co.enear.parsercombinators.compiler.{Location, WorkflowParserError}" - , "import co.enear.parsercombinators.lexer._" + , text $ "import " ++ name ++ ".workflowtoken." ++ name ++ "Lex._" , "import scala.util.parsing.combinator.Parsers" , "import scala.util.parsing.input.{NoPosition, Position, Reader}" ] @@ -208,6 +169,17 @@ addExtraClasses = [ , "}" ] +addWorkflowCompiler :: [Doc] +addWorkflowCompiler = [ + "object WorkflowCompiler {" + , nest 4 "def apply(code: String): Either[WorkflowCompilationError, WorkflowAST] = {" + , nest 8 "for {" + , nest 12 "tokens <- WorkflowLexer(code).right" + , nest 12 "ast <- WorkflowParser(tokens).right" + , nest 8 "} yield ast" + , nest 4 "}" + , "}" + ] initWorkflowClass :: [Doc] initWorkflowClass = [ @@ -295,68 +267,11 @@ getIndentationsFunction = [ ] --- getSymbFromName :: String -> String --- getSymbFromName s = --- case symbolToName s of --- Just s -> s --- _ -> s - - - --- getTokensFunction :: [String] -> [Doc] --- getTokensFunction symbs = [ --- "def tokens: Parser[List[WorkflowToken]] = {" --- , nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))" --- -- next line is needed in case of indentation use --- -- , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ ")) ^^ { rawTokens =>" --- -- , nest 8 "processIndent(rawTokens)" --- , "}" --- ] - - --- getSymName :: String -> String --- getSymName = toLowerString.getSymbFromName - --- getListSymNames :: [String] -> [String] --- getListSymNames symbs = map getSymName symbs - --- getKeywordParsers :: [String] -> [Doc] --- getKeywordParsers symbs = map getKeywordParser symbs - - --- def plus = positioned { "+" ^^ (_ => PLUS()) } --- getKeywordParser :: String -> Doc --- getKeywordParser symb = text $ "def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ =>"++ getSymbFromName symb ++"()) }" --- def+= +^^ (_ =>+()) - --- following functino must be added only in case of acepting indentation as block separetion --- getProcessIndentFunction :: [Doc] --- getProcessIndentFunction = [ --- "private def processIndent(tokens: List[WorkflowToken], indents: List[Int] = List(0)): List[WorkflowToken] = {" --- nest 4 "tokens.headOption match {" - --- case Some(INDENTATION(spaces)) if spaces > indents.head => --- INDENT() :: processIndentations(tokens.tail, spaces :: indents) - --- // if there is a decrease, we pop from the stack until we have matched the new level and --- // we produce a DEDENT for each pop --- case Some(INDENTATION(spaces)) if spaces < indents.head => --- val (dropped, kept) = indents.partition(_ > spaces) --- (dropped map (_ => DEDENT())) ::: processIndentations(tokens.tail, kept) - --- // if the indentation level stays unchanged, no tokens are produced --- case Some(INDENTATION(spaces)) if spaces == indents.head => --- processIndentations(tokens.tail, indents) - --- // other tokens are ignored --- case Some(token) => --- token :: processIndentations(tokens.tail, indents) - --- // the final step is to produce a DEDENT for each indentation level still remaining, thus --- // "closing" the remaining open INDENTS --- case None => --- indents.filter(_ > 0).map(_ => DEDENT()) - --- } --- } --- ] \ No newline at end of file +disambiguateNames :: [String] -> [String] +disambiguateNames = disamb [] + where + disamb ns1 (n:ns2) + | n `elem` (ns1 ++ ns2) = let i = length (filter (==n) ns1) + 1 + in (n ++ show i) : disamb (n:ns1) ns2 + | otherwise = n : disamb (n:ns1) ns2 + disamb _ [] = [] \ No newline at end of file From ce75ef8173d4ecb0d3fcb9fd7798a92c104e2b89 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Thu, 20 Mar 2025 23:36:07 +0100 Subject: [PATCH 17/42] add integer and removed unused function --- source/src/BNFC/Backend/Scala/CFtoScalaParser.hs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 736a1efb0..0bf708578 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -234,6 +234,13 @@ getDoubleFunction = vcat [ , "}" ] +getIntegerFunction :: Doc +getIntegerFunction = vcat [ + "def integer: Parser[EInt] = positioned {" + , nest 4 "accept(\"integer\", { case INTEGER(i) => EInt(i.toInt) })" + ,"}" + ] + getLiteralsFunction :: Doc getLiteralsFunction = vcat [ @@ -256,15 +263,6 @@ getLiteralFunction = vcat [ ] -getIndentationsFunction :: [Doc] -getIndentationsFunction = [ - "def indentation: Parser[INDENTATION] = {" - , nest 4 "\"\\n[ ]*\".r ^^ { whitespace =>" - , nest 6 "val nSpaces = whitespace.length - 1" - , nest 6 "INDENTATION(nSpaces)" - , nest 4 "}" - , "}" - ] disambiguateNames :: [String] -> [String] From b897701fb41ebe968076abbb3d07e55a3c0ccd53 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Fri, 21 Mar 2025 00:39:49 +0100 Subject: [PATCH 18/42] version with simple rules working better --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 0bf708578..6a60d8f2b 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -121,10 +121,10 @@ prSubRuleDoc r = map text (prSubRule r) filterNotEqual :: Eq a => a -> [a] -> [a] filterNotEqual element list = filter (/= element) list - + -- Función para generar una regla simple cuando la RHS es "integer" rulesToString :: [Rule] -> [Doc] rulesToString [] = [""] -rulesToString rules@(Rule fun cat _ _ : _) = +rulesToString rules@(Rule fun cat rhs internal : _) = let -- Filtramos las reglas que pertenecen a la misma categoría `cat` (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules @@ -134,19 +134,42 @@ rulesToString rules@(Rule fun cat _ _ : _) = fnm = funName fun exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - -- Generamos la cabecera única con todas las alternativas dentro de `rep(...)` + -- Verificamos si la RHS es "integer" + intRule = case rhs of + (Left a : _) -> show a + _ -> "" + + isIntegerRule = isTokenCat $ strToCat intRule -- Verificamos si es "integer" + + -- Si es una regla "integer", generamos una definición simple + simpleRule = if isIntegerRule + then [text $ "def " ++ exitCatName ++ ": Parser[EInt] = positioned {\n" ++ + "accept(\"integer\", { case INTEGER(i) => EInt(i.toInt) })\n" ++ + "}"] + else [] + + -- Si no es "integer", generamos la cabecera única con las alternativas en `rep(...)` header = text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" subHeader = text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" -- Generamos los `case` correspondientes a las reglas de `sameCat` rulesDocs = concatMap prSubRuleDoc sameCat - in [header] ++ map (nest 4) [subHeader] ++ map (nest 8) rulesDocs ++ [nest 4 "}"] ++ ["}"] ++ rulesToString rest + in if isIntegerRule + then simpleRule -- Solo generamos la regla simple para "integer" + else [header] ++ map (nest 4) [subHeader] ++ map (nest 8) rulesDocs ++ [nest 4 "}"] ++ ["}"] ++ rulesToString rest ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] ruleGroupsCFToString [] = [""] -ruleGroupsCFToString ((c, r):crs) = - [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] ++ ruleGroupsCFToString crs +ruleGroupsCFToString ((c, r):crs) = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] ++ ruleGroupsCFToString crs + + +-- ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] +-- ruleGroupsCFToString [] = [""] +-- ruleGroupsCFToString ((c, r):crs) +-- | shw (rhsRule r) `elem` baseTokenCatNames = ["estoy en built in"] +-- | otherwise = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] ++ ruleGroupsCFToString crs + From 4b4a2e810046bef6337c8e24b1f5479e6f7bdd00 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 24 Mar 2025 23:26:47 +0100 Subject: [PATCH 19/42] fix base function as extra def in parser --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 112 +++++++++++++----- 1 file changed, 82 insertions(+), 30 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 6a60d8f2b..ac77da13c 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -30,6 +30,7 @@ import BNFC.Backend.Common.StrUtils (renderCharOrString) import BNFC.Backend.Haskell.Utils (catToType) import Data.Maybe (isNothing) import Data.Char (toLower) +import qualified Data.Map as Map cf2ScalaParser :: SharedOptions @@ -51,8 +52,6 @@ cf2ScalaParser Options{ lang } cf = vsep . concat $ , addWorkflowCompiler ] where - -- absc = absclasses cabs - -- cabs = cf2cabs cf -- symbs = unicodeAndSymbols cf -- rules = ruleGroupsInternals cf -- cfKeywords = reservedWords cf @@ -115,16 +114,17 @@ coerCatDefSign cat = pre = firstLowerCase $ show cat prSubRuleDoc :: Rule -> [Doc] -prSubRuleDoc r = map text (prSubRule r) +prSubRuleDoc r = map text (prSubRule r) filterNotEqual :: Eq a => a -> [a] -> [a] filterNotEqual element list = filter (/= element) list + -- Función para generar una regla simple cuando la RHS es "integer" rulesToString :: [Rule] -> [Doc] rulesToString [] = [""] -rulesToString rules@(Rule fun cat rhs internal : _) = +rulesToString rules@(Rule fun cat rhs _ : _) = let -- Filtramos las reglas que pertenecen a la misma categoría `cat` (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules @@ -134,42 +134,75 @@ rulesToString rules@(Rule fun cat rhs internal : _) = fnm = funName fun exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - -- Verificamos si la RHS es "integer" - intRule = case rhs of + -- Obtenemos el nombre de la regla [Left Exp] -> Exp + ruleName = case rhs of (Left a : _) -> show a _ -> "" - - isIntegerRule = isTokenCat $ strToCat intRule -- Verificamos si es "integer" - - -- Si es una regla "integer", generamos una definición simple - simpleRule = if isIntegerRule - then [text $ "def " ++ exitCatName ++ ": Parser[EInt] = positioned {\n" ++ - "accept(\"integer\", { case INTEGER(i) => EInt(i.toInt) })\n" ++ - "}"] - else [] -- Si no es "integer", generamos la cabecera única con las alternativas en `rep(...)` - header = text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" - subHeader = text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" + header = [text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {"] + subHeader = [nest 4 $ text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {"] -- Generamos los `case` correspondientes a las reglas de `sameCat` - rulesDocs = concatMap prSubRuleDoc sameCat - in if isIntegerRule - then simpleRule -- Solo generamos la regla simple para "integer" - else [header] ++ map (nest 4) [subHeader] ++ map (nest 8) rulesDocs ++ [nest 4 "}"] ++ ["}"] ++ rulesToString rest + rulesDocs = map (nest 8) $ concatMap prSubRuleDoc sameCat + + in header ++ subHeader ++ rulesDocs ++ [nest 4 "}", "}"] ++ rulesToString rest ++ ["}"] + + +extractCategories :: Rule -> [Cat] +extractCategories (Rule _ _ rhs _) = [c | Left c <- rhs] + +filterBaseTokenCats :: [Rule] -> [Cat] +filterBaseTokenCats rules = + (nub (concatMap extractCategories rules)) `intersect` (map TokenCat specialCatsP) + + +ruleGroupsCFToString :: [(Cat, [Rule])] -> [Doc] +ruleGroupsCFToString catsAndRules = + let + -- Process each group to generate its rules + processGroup (c, r) = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) + + -- Function to extract the function name from a rule + getFunName :: Rule -> String + getFunName (Rule fun _ _ _) = (wpThing fun) + -- Helper to check if a rule contains a TokenCat in RHS + hasTokenCat :: TokenCat -> Rule -> Bool + hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] -ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] -ruleGroupsCFToString [] = [""] -ruleGroupsCFToString ((c, r):crs) = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] ++ ruleGroupsCFToString crs + -- Find the first rule where `TokenCat catInteger` appears + integerRuleData = find (\(_, rules) -> any (hasTokenCat catInteger) rules) catsAndRules + stringRuleData = find (\(_, rules) -> any (hasTokenCat catString) rules) catsAndRules + -- Generate the integer rule only if needed + integerRule = case integerRuleData of + Just (_, rules) -> case find (hasTokenCat catInteger) rules of + Just rule -> + let funName = getFunName rule in + [ text $ "def integer: Parser[" ++ funName ++ "] = positioned {" + , nest 4 $ text $ "accept(\"integer\", { case INTEGER(i) => " ++ funName ++ "(i.toInt) })" + , "}" + ] + Nothing -> [] + Nothing -> [] --- ruleGroupsCFToString :: [(Cat,[Rule])] -> [Doc] --- ruleGroupsCFToString [] = [""] --- ruleGroupsCFToString ((c, r):crs) --- | shw (rhsRule r) `elem` baseTokenCatNames = ["estoy en built in"] --- | otherwise = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] ++ ruleGroupsCFToString crs + -- Generate the string rule only if needed + stringRule = case stringRuleData of + Just (_, rules) -> case find (hasTokenCat catString) rules of + Just rule -> + let funName = getFunName rule in + [ text $ "def string: Parser[" ++ funName ++ "] = positioned {" + , nest 4 $ text $ "accept(\"string\", { case STRING(s) => " ++ funName ++ "(s) })" + , "}" + ] + Nothing -> [] + Nothing -> [] + -- Generate all main rules + mainGeneratedRules = concatMap processGroup catsAndRules + + in mainGeneratedRules ++ integerRule ++ stringRule @@ -295,4 +328,23 @@ disambiguateNames = disamb [] | n `elem` (ns1 ++ ns2) = let i = length (filter (==n) ns1) + 1 in (n ++ show i) : disamb (n:ns1) ns2 | otherwise = n : disamb (n:ns1) ns2 - disamb _ [] = [] \ No newline at end of file + disamb _ [] = [] + + + + + +baseTypeToScalaType :: String -> Maybe String +baseTypeToScalaType = (`Map.lookup` baseTypeMap) + +-- | Map from base LBNF Type to scala Type. + +baseTypeMap :: Map String String +baseTypeMap = Map.fromList scalaTypesMap + +scalaTypesMap :: [(String, String)] +scalaTypesMap = + [ ("Integer" , "Int") + , ("String" , "String") + , ("Double" , "Double") + ] \ No newline at end of file From fd7c876a7c385e1d5109582be02799a9320bdc2a Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Tue, 25 Mar 2025 22:52:36 +0100 Subject: [PATCH 20/42] fix integer rule creation --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 71 ++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index ac77da13c..394d9f567 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -17,7 +17,7 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import Prelude hiding ((<>)) import qualified Data.Foldable as DF (toList) -import BNFC.Utils (symbolToName, mkNames, NameStyle (..)) +import BNFC.Utils (symbolToName, mkNames, NameStyle (..), lowerCase) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options @@ -79,6 +79,13 @@ getSymbFromName s = prPrintRule_ :: Rule -> [String] prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ catToStrings items +-- Function to extract the function name from a rule +getFunName :: Rule -> String +getFunName (Rule fun _ _ _) = (wpThing fun) + +-- Helper to check if a rule contains a TokenCat in RHS +hasTokenCat :: TokenCat -> Rule -> Bool +hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] prCoerciveRule :: Rule -> [String] prCoerciveRule r@(Rule _ _ _ _) = concat [ @@ -119,9 +126,9 @@ prSubRuleDoc r = map text (prSubRule r) filterNotEqual :: Eq a => a -> [a] -> [a] filterNotEqual element list = filter (/= element) list - - -- Función para generar una regla simple cuando la RHS es "integer" + + rulesToString :: [Rule] -> [Doc] rulesToString [] = [""] rulesToString rules@(Rule fun cat rhs _ : _) = @@ -131,14 +138,12 @@ rulesToString rules@(Rule fun cat rhs _ : _) = -- Obtenemos los símbolos de las reglas filtradas vars = concatMap prPrintRule_ sameCat - fnm = funName fun exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - - -- Obtenemos el nombre de la regla [Left Exp] -> Exp - ruleName = case rhs of - (Left a : _) -> show a - _ -> "" + integerRuleData = case sameCat of + (fRule:_) -> hasTokenCat catInteger fRule + [] -> False + -- Si no es "integer", generamos la cabecera única con las alternativas en `rep(...)` header = [text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {"] subHeader = [nest 4 $ text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {"] @@ -146,7 +151,38 @@ rulesToString rules@(Rule fun cat rhs _ : _) = -- Generamos los `case` correspondientes a las reglas de `sameCat` rulesDocs = map (nest 8) $ concatMap prSubRuleDoc sameCat - in header ++ subHeader ++ rulesDocs ++ [nest 4 "}", "}"] ++ rulesToString rest ++ ["}"] + -- Si integerRuleData es True, evitamos agregar header y subHeader + mainBlock = if integerRuleData + then [text $ firstLowerCase catInteger ] -- TODO: change this to a map between cat and function name example (CatInteger -> integer) + else header ++ subHeader ++ rulesDocs ++ [nest 4 "}", "}"] + + in mainBlock ++ rulesToString rest + + +-- Función para generar una regla simple cuando la RHS es "integer" +-- rulesToString :: [Rule] -> [Doc] +-- rulesToString [] = [""] +-- rulesToString rules@(Rule fun cat rhs _ : _) = +-- let +-- -- Filtramos las reglas que pertenecen a la misma categoría `cat` +-- (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules + +-- -- Obtenemos los símbolos de las reglas filtradas +-- vars = concatMap prPrintRule_ sameCat +-- exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars + +-- integerRuleData = case sameCat of +-- (fRule:_) -> hasTokenCat catInteger fRule +-- [] -> False + +-- -- Si no es "integer", generamos la cabecera única con las alternativas en `rep(...)` +-- header = [text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {"] +-- subHeader = [nest 4 $ text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {"] + +-- -- Generamos los `case` correspondientes a las reglas de `sameCat` +-- rulesDocs = map (nest 8) $ concatMap prSubRuleDoc sameCat + +-- in header ++ subHeader ++ rulesDocs ++ [nest 4 "}", "}"] ++ rulesToString rest ++ ["}"] extractCategories :: Rule -> [Cat] @@ -161,15 +197,7 @@ ruleGroupsCFToString :: [(Cat, [Rule])] -> [Doc] ruleGroupsCFToString catsAndRules = let -- Process each group to generate its rules - processGroup (c, r) = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) - - -- Function to extract the function name from a rule - getFunName :: Rule -> String - getFunName (Rule fun _ _ _) = (wpThing fun) - - -- Helper to check if a rule contains a TokenCat in RHS - hasTokenCat :: TokenCat -> Rule -> Bool - hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] + processGroup (c, r) = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] -- Find the first rule where `TokenCat catInteger` appears integerRuleData = find (\(_, rules) -> any (hasTokenCat catInteger) rules) catsAndRules @@ -319,8 +347,6 @@ getLiteralFunction = vcat [ ] - - disambiguateNames :: [String] -> [String] disambiguateNames = disamb [] where @@ -331,9 +357,6 @@ disambiguateNames = disamb [] disamb _ [] = [] - - - baseTypeToScalaType :: String -> Maybe String baseTypeToScalaType = (`Map.lookup` baseTypeMap) From 7c299aa3dcb2d8e27a6601b7c41c84cb48be097c Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Tue, 25 Mar 2025 23:29:59 +0100 Subject: [PATCH 21/42] fix AST, remove unused code --- source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 1 - source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 51 +------ .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 6 - .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 137 +----------------- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 73 ++++++---- 5 files changed, 49 insertions(+), 219 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs index 0d7bf5094..e8ecc53a8 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs @@ -21,7 +21,6 @@ import Prelude hiding ((<>)) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options -import Data.List -- | The result is two files (.H file, .C file) cf2ScalaAbs diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index d4308c49b..03f6e9dbf 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -21,7 +21,7 @@ import BNFC.CF import BNFC.PrettyPrint import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) -import Data.List +import Data.List ( intercalate ) import Data.Char (toLower) toLowerString :: String -> String @@ -155,17 +155,6 @@ getLiteralFunction = vcat [ ] -getIndentationsFunction :: [Doc] -getIndentationsFunction = [ - "def indentation: Parser[INDENTATION] = {" - , nest 4 "\"\\n[ ]*\".r ^^ { whitespace =>" - , nest 6 "val nSpaces = whitespace.length - 1" - , nest 6 "INDENTATION(nSpaces)" - , nest 4 "}" - , "}" - ] - - getSymbFromName :: String -> String getSymbFromName s = case symbolToName s of @@ -173,14 +162,10 @@ getSymbFromName s = _ -> s - getTokensFunction :: [String] -> [Doc] getTokensFunction symbs = [ "def tokens: Parser[List[WorkflowToken]] = {" , nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))" - -- next line is needed in case of indentation use - -- , nest 4 $ text $ "phrase(rep1(" ++ intercalate " | " (map (\s -> "\"" ++ s ++ "\"") symbs) ++ ")) ^^ { rawTokens =>" - -- , nest 8 "processIndent(rawTokens)" , "}" ] @@ -195,39 +180,5 @@ getKeywordParsers :: [String] -> [Doc] getKeywordParsers symbs = map getKeywordParser symbs --- def plus = positioned { "+" ^^ (_ => PLUS()) } getKeywordParser :: String -> Doc getKeywordParser symb = text $ "def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ =>"++ getSymbFromName symb ++"()) }" --- def+= +^^ (_ =>+()) - --- following functino must be added only in case of acepting indentation as block separetion --- getProcessIndentFunction :: [Doc] --- getProcessIndentFunction = [ --- "private def processIndent(tokens: List[WorkflowToken], indents: List[Int] = List(0)): List[WorkflowToken] = {" --- nest 4 "tokens.headOption match {" - --- case Some(INDENTATION(spaces)) if spaces > indents.head => --- INDENT() :: processIndentations(tokens.tail, spaces :: indents) - --- // if there is a decrease, we pop from the stack until we have matched the new level and --- // we produce a DEDENT for each pop --- case Some(INDENTATION(spaces)) if spaces < indents.head => --- val (dropped, kept) = indents.partition(_ > spaces) --- (dropped map (_ => DEDENT())) ::: processIndentations(tokens.tail, kept) - --- // if the indentation level stays unchanged, no tokens are produced --- case Some(INDENTATION(spaces)) if spaces == indents.head => --- processIndentations(tokens.tail, indents) - --- // other tokens are ignored --- case Some(token) => --- token :: processIndentations(tokens.tail, indents) - --- // the final step is to produce a DEDENT for each indentation level still remaining, thus --- // "closing" the remaining open INDENTS --- case None => --- indents.filter(_ > 0).map(_ => DEDENT()) - --- } --- } --- ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index 2688d4e54..4d8e56333 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -47,12 +47,6 @@ generateSymbClass symb = case symbolToName symb of Nothing -> "" -generateIntegerClasses :: [String] -> Doc -generateIntegerClasses params = text $ concat $ map generateIntegerClass params - -generateIntegerClass :: String -> String -generateIntegerClass param = "case class " ++ param ++ "(i: Int) extends WorkflowToken \n" - generateStringClasses :: [String] -> Doc generateStringClasses params = text $ concat $ map generateStringClass params diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 394d9f567..7e13f16e7 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -17,20 +17,13 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import Prelude hiding ((<>)) import qualified Data.Foldable as DF (toList) -import BNFC.Utils (symbolToName, mkNames, NameStyle (..), lowerCase) +import BNFC.Utils (symbolToName) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options -import BNFC.Backend.Common (unicodeAndSymbols) -import Data.List -import BNFC.Backend.Common.OOAbstract (cf2cabs, absclasses) -import Data.Map (Map, toList) -import BNFC.Backend.Common.NamedVariables (firstLowerCase, numVars) -import BNFC.Backend.Common.StrUtils (renderCharOrString) -import BNFC.Backend.Haskell.Utils (catToType) -import Data.Maybe (isNothing) +import Data.List (find, intercalate, isSuffixOf ) +import BNFC.Backend.Common.NamedVariables (firstLowerCase) import Data.Char (toLower) -import qualified Data.Map as Map cf2ScalaParser :: SharedOptions @@ -44,26 +37,13 @@ cf2ScalaParser Options{ lang } cf = vsep . concat $ , map (nest 4) addExtraClasses , map (nest 4) getApplyFunction , map (nest 4) (getProgramFunction cf) - -- , map (nest 4) getBlockFunction - -- , map (nest 4) (addParserFunctions liters) - -- , map (nest 4) (getKeywordParsers symbs) , map (nest 4) strRules , endWorkflowClass , addWorkflowCompiler ] where - -- symbs = unicodeAndSymbols cf - -- rules = ruleGroupsInternals cf - -- cfKeywords = reservedWords cf - -- cfgSign = signatureToString (cfgSignature cf) - -- astData = specialData cf - -- astData = getAbstractSyntax cf - -- datasToPrint = getDatas astData - -- parserCats = allParserCats cf strRules = ruleGroupsCFToString (ruleGroups cf) - - catToStrings :: [Either Cat String] -> [String] catToStrings = map (\case Left c -> show c @@ -87,18 +67,14 @@ getFunName (Rule fun _ _ _) = (wpThing fun) hasTokenCat :: TokenCat -> Rule -> Bool hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] -prCoerciveRule :: Rule -> [String] -prCoerciveRule r@(Rule _ _ _ _) = concat [ - [render type'] - ] - where - type' = catToType id empty $ valCat r - safeTail :: [a] -> [a] safeTail [] = [] -- Si la lista está vacía, devuelve una lista vacía safeTail (_:xs) = xs +filterSymbs :: [[Char]] -> [[Char]] filterSymbs = filter (not . isSuffixOf "()") + +onlySymbs :: [[Char]] -> [[Char]] onlySymbs = filter (isSuffixOf "()") prSubRule :: Rule -> [String] @@ -117,21 +93,17 @@ coerCatDefSign :: Cat -> Doc coerCatDefSign cat = text $ "def " ++ pre ++ ": Parser[WorkflowAST] = positioned {" where - sCat = render $ catToType id empty cat pre = firstLowerCase $ show cat prSubRuleDoc :: Rule -> [Doc] prSubRuleDoc r = map text (prSubRule r) - filterNotEqual :: Eq a => a -> [a] -> [a] filterNotEqual element list = filter (/= element) list - - rulesToString :: [Rule] -> [Doc] rulesToString [] = [""] -rulesToString rules@(Rule fun cat rhs _ : _) = +rulesToString rules@(Rule _ cat _ _ : _) = let -- Filtramos las reglas que pertenecen a la misma categoría `cat` (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules @@ -159,40 +131,6 @@ rulesToString rules@(Rule fun cat rhs _ : _) = in mainBlock ++ rulesToString rest --- Función para generar una regla simple cuando la RHS es "integer" --- rulesToString :: [Rule] -> [Doc] --- rulesToString [] = [""] --- rulesToString rules@(Rule fun cat rhs _ : _) = --- let --- -- Filtramos las reglas que pertenecen a la misma categoría `cat` --- (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules - --- -- Obtenemos los símbolos de las reglas filtradas --- vars = concatMap prPrintRule_ sameCat --- exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - --- integerRuleData = case sameCat of --- (fRule:_) -> hasTokenCat catInteger fRule --- [] -> False - --- -- Si no es "integer", generamos la cabecera única con las alternativas en `rep(...)` --- header = [text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {"] --- subHeader = [nest 4 $ text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {"] - --- -- Generamos los `case` correspondientes a las reglas de `sameCat` --- rulesDocs = map (nest 8) $ concatMap prSubRuleDoc sameCat - --- in header ++ subHeader ++ rulesDocs ++ [nest 4 "}", "}"] ++ rulesToString rest ++ ["}"] - - -extractCategories :: Rule -> [Cat] -extractCategories (Rule _ _ rhs _) = [c | Left c <- rhs] - -filterBaseTokenCats :: [Rule] -> [Cat] -filterBaseTokenCats rules = - (nub (concatMap extractCategories rules)) `intersect` (map TokenCat specialCatsP) - - ruleGroupsCFToString :: [(Cat, [Rule])] -> [Doc] ruleGroupsCFToString catsAndRules = let @@ -233,7 +171,6 @@ ruleGroupsCFToString catsAndRules = in mainGeneratedRules ++ integerRule ++ stringRule - imports :: String -> [Doc] imports name = [ text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser" @@ -303,50 +240,6 @@ getProgramFunction cf = [ eps = firstLowerCase $ show $ head $ map normCat $ DF.toList $ allEntryPoints cf -getBlockFunction :: [Doc] -getBlockFunction = [ - "def block: Parser[WorkflowAST] = positioned {" - , nest 4 "rep1(statement) ^^ { case stmtList => stmtList reduceRight AndThen }" - , "}" - ] - - -getDoubleFunction :: Doc -getDoubleFunction = vcat [ - "def double: Parser[Double] = {" - , nest 4 "\"[0-9]+.[0-9]+\".r ^^ {i => Double(i)}" - , "}" - ] - -getIntegerFunction :: Doc -getIntegerFunction = vcat [ - "def integer: Parser[EInt] = positioned {" - , nest 4 "accept(\"integer\", { case INTEGER(i) => EInt(i.toInt) })" - ,"}" - ] - - -getLiteralsFunction :: Doc -getLiteralsFunction = vcat [ - "def string: Parser[STRING] = {" - , nest 4 "\"\\\"[^\\\"]*\\\"\".r ^^ { str =>" - , nest 6 "val content = str.substring(1, str.length - 1)" - , nest 6 "STRING(content)" - , nest 4 "}" - , "}" - ] - -getLiteralFunction :: Doc -getLiteralFunction = vcat [ - "def char: Parser[CHAR] = {" - , nest 4 "\"\\\'[^\\\']*\\\'\".r ^^ { str =>" - , nest 6 "val content = str.substring(1, str.length - 1)" - , nest 6 "CHAR(content)" - , nest 4 "}" - , "}" - ] - - disambiguateNames :: [String] -> [String] disambiguateNames = disamb [] where @@ -355,19 +248,3 @@ disambiguateNames = disamb [] in (n ++ show i) : disamb (n:ns1) ns2 | otherwise = n : disamb (n:ns1) ns2 disamb _ [] = [] - - -baseTypeToScalaType :: String -> Maybe String -baseTypeToScalaType = (`Map.lookup` baseTypeMap) - --- | Map from base LBNF Type to scala Type. - -baseTypeMap :: Map String String -baseTypeMap = Map.fromList scalaTypesMap - -scalaTypesMap :: [(String, String)] -scalaTypesMap = - [ ("Integer" , "Int") - , ("String" , "String") - , ("Double" , "Double") - ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 39e7bf1b4..7252cd5bc 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -23,11 +23,12 @@ import BNFC.CF import BNFC.PrettyPrint import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) -import BNFC.Utils (symbolToName) +import BNFC.Utils (symbolToName, camelCase) import Data.Char (toUpper) import BNFC.Backend.Common.NamedVariables (firstLowerCase) import Data.List (intercalate) +import Data.Map (Map, fromList, lookup) cf2ScalaParserAST :: SharedOptions @@ -36,37 +37,11 @@ cf2ScalaParserAST cf2ScalaParserAST Options{ lang } cf = vsep . concat $ [ headers lang - -- , [text $ concat $ map generateSymbClass symbs] - -- , [generateStringClasses liters] - -- , map (text.show) parserCats , strRules ] where - liters = literals cf - symbs = unicodeAndSymbols cf - parserCats = allParserCats cf strRules = prRulesNames rules rules = ruleGroups cf - - - -generateSymbClass :: String -> String -generateSymbClass symb = case symbolToName symb of - Just s -> "case class " ++ s ++ "() extends WorkflowAST \n" - Nothing -> "" - - -generateIntegerClasses :: [String] -> Doc -generateIntegerClasses params = text $ concat $ map generateIntegerClass params - -generateIntegerClass :: String -> String -generateIntegerClass param = "case class " ++ param ++ "(i: Int) extends WorkflowAST \n" - -generateStringClasses :: [String] -> Doc -generateStringClasses params = text $ concat $ map generateStringClass params - -generateStringClass :: String -> String -generateStringClass param = "case class " ++ (map toUpper param) ++ "(str: String) extends WorkflowAST \n" -- case class EAdd(exp: WorkflowAST, exp1: WorkflowAST) extends WorkflowAST @@ -74,24 +49,44 @@ generateStringClass param = "case class " ++ (map toUpper param) ++ "(str: Strin catFilterToStrings :: [Either Cat String] -> [String] catFilterToStrings = map (\case Left c -> show c - Right s -> "" + Right _ -> "" ) prRuleName :: Rule -> [Doc] -prRuleName r@(Rule fun cat rhs _) - -- | isCoercion fun = prCoerciveRule r +prRuleName (Rule fun _ rhs _) | isCoercion fun = [""] | otherwise = [ text $ "case class " ++ fnm ++ "(" - , text $ intercalate ", " $ map (++ ": WorkflowAST") parsNames - -- , intersperse (text ", ") (filter (not . null) [text (param ++ ": WorkflowAST") | param <- parsNames, not (null param)]) + , text $ intercalate ", " $ map formatParam parsNames , " ) extends WorkflowAST" ] where fnm = funName fun parsNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) + -- Si el parámetro es "integer", lo tipamos como Int; si no, como WorkflowAST + formatParam param + | (show (camelCase param)) `elem` baseTokenCatNames = param ++ ": " ++ case baseTypeToScalaType param of -- convertir a camelCase el param, seguramente no sea le mejor forma de detemrinar el tipo, otra? + Just s -> s + _ -> "Int" -- TODO: Int no deberia ser el default, deberia fallar, creo + | otherwise = param ++ ": WorkflowAST" + + +-- prRuleName :: Rule -> [Doc] +-- prRuleName r@(Rule fun _ rhs _) +-- -- | isCoercion fun = prCoerciveRule r +-- | isCoercion fun = [""] +-- | otherwise = [ +-- text $ "case class " ++ fnm ++ "(" +-- , text $ intercalate ", " $ map (++ ": WorkflowAST") parsNames +-- -- , intersperse (text ", ") (filter (not . null) [text (param ++ ": WorkflowAST") | param <- parsNames, not (null param)]) +-- , " ) extends WorkflowAST" +-- ] +-- where +-- fnm = funName fun +-- parsNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) + ruleIterPr :: [Rule] -> [Doc] ruleIterPr [] = [""] ruleIterPr (r:[]) = prRuleName r @@ -112,3 +107,17 @@ headers name = [ ] +baseTypeToScalaType :: String -> Maybe String +baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) + +-- | Map from base LBNF Type to scala Type. + +baseTypeMap :: Map String String +baseTypeMap = fromList scalaTypesMap + +scalaTypesMap :: [(String, String)] +scalaTypesMap = + [ ("Integer" , "Int") + , ("String" , "String") + , ("Double" , "Double") + ] \ No newline at end of file From 55b5378e512798bb074a4954721a32c8d08e2411 Mon Sep 17 00:00:00 2001 From: WilliamOlsen13 <50978646+WilliamOlsen13@users.noreply.github.com> Date: Thu, 27 Mar 2025 00:21:34 -0300 Subject: [PATCH 22/42] Fix AST formatting Code refactoring to replace document fragmentation with a more functional approach for cleaner code and better formatting. --- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 119 ++++++++---------- 1 file changed, 51 insertions(+), 68 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 7252cd5bc..e4101d3bc 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -1,17 +1,16 @@ - {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} {- BNF Converter: Scala Lextract syntax - Copyright (Scala) 2024 Author: Juan Pablo Poittevin + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura Description : This module generates the Scala Lextract Syntax tree classes. It generates both a Header file and an Implementation file - Author : Juan Pablo Poittevin + Author : Juan Pablo Poittevin, Guillermo Poladura Created : 30 September, 2024 -} @@ -30,91 +29,75 @@ import BNFC.Backend.Common.NamedVariables (firstLowerCase) import Data.List (intercalate) import Data.Map (Map, fromList, lookup) -cf2ScalaParserAST - :: SharedOptions - -> CF - -> Doc -cf2ScalaParserAST Options{ lang } cf = vsep . concat $ - [ - headers lang - , strRules - ] +-- | Main function that generates the AST code +cf2ScalaParserAST :: SharedOptions -> CF -> Doc +cf2ScalaParserAST Options{ lang } cf = vcat $ + -- Generate headers + headers lang ++ + -- Add an empty line after the trait definition + [text ""] ++ + -- Generate case class definitions + generateRuleDefs rules where - strRules = prRulesNames rules - rules = ruleGroups cf + rules = ruleGroups cf + +-- | Generate all case class definitions +generateRuleDefs :: [(Cat, [Rule])] -> [Doc] +generateRuleDefs [] = [] +generateRuleDefs rules = concatMap processRuleGroup rules +-- | Process a single rule group and generate case classes +processRuleGroup :: (Cat, [Rule]) -> [Doc] +processRuleGroup (_, rules) = map createCaseClass (filter (not . isCoercionRule) rules) --- case class EAdd(exp: WorkflowAST, exp1: WorkflowAST) extends WorkflowAST +-- | Check if a rule is a coercion rule +isCoercionRule :: Rule -> Bool +isCoercionRule (Rule fun _ _ _) = isCoercion fun +-- | Create a single case class definition +createCaseClass :: Rule -> Doc +createCaseClass (Rule fun _ rhs _) = + text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" + where + className = funName fun + paramNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) + params = intercalate ", " $ map formatParam paramNames + +-- | Format a parameter with its type +formatParam :: String -> String +formatParam param + | (show (camelCase param)) `elem` BNFC.CF.baseTokenCatNames = + param ++ ": " ++ case baseTypeToScalaType param of + Just s -> s + _ -> "Int" -- Default to Int + | otherwise = param ++ ": WorkflowAST" + +-- | Extract category strings from rule RHS catFilterToStrings :: [Either Cat String] -> [String] catFilterToStrings = map (\case Left c -> show c Right _ -> "" ) - -prRuleName :: Rule -> [Doc] -prRuleName (Rule fun _ rhs _) - | isCoercion fun = [""] - | otherwise = [ - text $ "case class " ++ fnm ++ "(" - , text $ intercalate ", " $ map formatParam parsNames - , " ) extends WorkflowAST" - ] - where - fnm = funName fun - parsNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) - - -- Si el parámetro es "integer", lo tipamos como Int; si no, como WorkflowAST - formatParam param - | (show (camelCase param)) `elem` baseTokenCatNames = param ++ ": " ++ case baseTypeToScalaType param of -- convertir a camelCase el param, seguramente no sea le mejor forma de detemrinar el tipo, otra? - Just s -> s - _ -> "Int" -- TODO: Int no deberia ser el default, deberia fallar, creo - | otherwise = param ++ ": WorkflowAST" - - --- prRuleName :: Rule -> [Doc] --- prRuleName r@(Rule fun _ rhs _) --- -- | isCoercion fun = prCoerciveRule r --- | isCoercion fun = [""] --- | otherwise = [ --- text $ "case class " ++ fnm ++ "(" --- , text $ intercalate ", " $ map (++ ": WorkflowAST") parsNames --- -- , intersperse (text ", ") (filter (not . null) [text (param ++ ": WorkflowAST") | param <- parsNames, not (null param)]) --- , " ) extends WorkflowAST" --- ] --- where --- fnm = funName fun --- parsNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) - -ruleIterPr :: [Rule] -> [Doc] -ruleIterPr [] = [""] -ruleIterPr (r:[]) = prRuleName r -ruleIterPr (r:rs) = prRuleName r ++ ruleIterPr rs - - -prRulesNames :: [(Cat,[Rule])] -> [Doc] -prRulesNames ( [] ) = [""] -prRulesNames ((_, r):[] ) = ruleIterPr r -prRulesNames ((_, r):crs ) = ruleIterPr r ++ prRulesNames crs - - +-- | Generate the header part of the file headers :: String -> [Doc] headers name = [ - text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser" - , "import scala.util.parsing.input.Positional" - , "sealed trait WorkflowAST extends Positional" + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser", + text "", + text "import scala.util.parsing.input.Positional", + text "", + text "sealed trait WorkflowAST extends Positional" ] - +-- | Convert base LBNF type to Scala type baseTypeToScalaType :: String -> Maybe String baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) --- | Map from base LBNF Type to scala Type. - +-- | Map from base LBNF Type to scala Type baseTypeMap :: Map String String baseTypeMap = fromList scalaTypesMap +-- | Scala types mapping scalaTypesMap :: [(String, String)] scalaTypesMap = [ ("Integer" , "Int") From 286eccb0c05224db961ad9a24311fd209f80cbb5 Mon Sep 17 00:00:00 2001 From: WilliamOlsen13 <50978646+WilliamOlsen13@users.noreply.github.com> Date: Thu, 27 Mar 2025 00:45:32 -0300 Subject: [PATCH 23/42] Fix Parser formatting Code refactoring to replace document fragmentation with a more functional approach for cleaner code and better formatting. --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 347 +++++++++--------- 1 file changed, 175 insertions(+), 172 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 7e13f16e7..b0a7b2b19 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -4,11 +4,12 @@ {- BNF Converter: Scala Parser syntax - Copyright (Scala) 2024 Author: Juan Pablo Poittevin + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura Description : This module generates the Scala Parser Syntax Using Scala Parser Combinator - Author : Juan Pablo Poittevin + + Author : Juan Pablo Poittevin, Guillermo Poladura Created : 30 September, 2024 -} @@ -21,230 +22,232 @@ import BNFC.Utils (symbolToName) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options -import Data.List (find, intercalate, isSuffixOf ) +import Data.List (find, intercalate, isSuffixOf) import BNFC.Backend.Common.NamedVariables (firstLowerCase) import Data.Char (toLower) -cf2ScalaParser - :: SharedOptions - -> CF -- Grammar. - -> Doc -cf2ScalaParser Options{ lang } cf = vsep . concat $ - [ - [] - , imports lang - , initWorkflowClass - , map (nest 4) addExtraClasses - , map (nest 4) getApplyFunction - , map (nest 4) (getProgramFunction cf) - , map (nest 4) strRules - , endWorkflowClass - , addWorkflowCompiler - ] +-- | Main function that generates the Scala parser code +cf2ScalaParser :: SharedOptions -> CF -> Doc +cf2ScalaParser Options{ lang } cf = vcat $ + -- Generate header and imports + imports lang ++ + -- Start the WorkflowParser object + initWorkflowClass ++ + -- Indent the class contents + [nest 4 $ vcat $ + -- Add extra classes + addExtraClasses ++ + -- Add apply function + getApplyFunction ++ + -- Add program function + getProgramFunction cf ++ + -- Add parser rules + generateAllRules (ruleGroups cf) + ] ++ + -- End the WorkflowParser object + endWorkflowClass ++ + -- Add the WorkflowCompiler object + addWorkflowCompiler + +-- | Generate all parser rules from rule groups +generateAllRules :: [(Cat, [Rule])] -> [Doc] +generateAllRules catsAndRules = + let + -- Generate regular rules + mainRules = concatMap generateRuleGroup catsAndRules + + -- Generate special rules for integer and string if needed + integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" catsAndRules + stringRule = generateSpecialRule catString "string" "STRING" "" catsAndRules + + in mainRules ++ integerRule ++ stringRule + +-- | Generate a rule group (definition for a single category) +generateRuleGroup :: (Cat, [Rule]) -> [Doc] +generateRuleGroup (cat, rules) = + [text $ "def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ + [nest 4 $ generateRuleBody rules] ++ + [text "}"] where - strRules = ruleGroupsCFToString (ruleGroups cf) + catName = firstLowerCase $ show cat + +-- | Generate the body of a rule +generateRuleBody :: [Rule] -> Doc +generateRuleBody rules@(Rule _ cat _ _ : _) = + if any (hasTokenCat catInteger) rules + then text $ firstLowerCase catInteger + else + let + vars = concatMap prPrintRule_ rules + exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars + + headerText = exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" + subHeaderText = "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" + + caseStatements = map generateCaseStatement rules + caseText = intercalate "\n" $ filter (not . null) caseStatements + in + text headerText $+$ + nest 4 (text subHeaderText) $+$ + nest 8 (vcat $ map text $ filter (not . null) caseStatements) $+$ + nest 4 (text "}") $+$ + text "}" +generateRuleBody [] = empty + +-- | Generate a case statement for a rule +generateCaseStatement :: Rule -> String +generateCaseStatement r@(Rule fun _ _ _) + | isCoercion fun = "" + | otherwise = + "case (" ++ head vars ++ ", " ++ intercalate " ~ " (safeTail vars) ++ ") => " + ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" + where + vars = disambiguateNames $ map modifyVars (prPrintRule_ r) + fnm = funName fun + modifyVars str = if "()" `isSuffixOf` str then str else [toLower (head str)] + +-- | Generate a special rule for tokens like Integer or String +generateSpecialRule :: TokenCat -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] +generateSpecialRule tokenCat ruleName tokenName conversion catsAndRules = + case find (\(_, rules) -> any (hasTokenCat tokenCat) rules) catsAndRules of + Just (_, rules) -> case find (hasTokenCat tokenCat) rules of + Just rule -> + let + funName = getFunName rule + conversionPart = if null conversion then "" else "." ++ conversion + in + [ text $ "def " ++ ruleName ++ ": Parser[" ++ funName ++ "] = positioned {" + , nest 4 $ text $ "accept(\"" ++ ruleName ++ "\", { case " ++ tokenName ++ "(i) => " ++ funName ++ "(i" ++ conversionPart ++ ") })" + , text "}" + ] + Nothing -> [] + Nothing -> [] + +-- | Extract terminal and non-terminal symbols from a rule +prPrintRule_ :: Rule -> [String] +prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ catToStrings items +-- | Convert a category or string to its string representation catToStrings :: [Either Cat String] -> [String] catToStrings = map (\case - Left c -> show c - Right s -> s - ) + Left c -> show c + Right s -> s + ) +-- | Get a symbol name from a string getSymbFromName :: String -> String getSymbFromName s = case symbolToName s of Just s -> s ++ "()" _ -> s -prPrintRule_ :: Rule -> [String] -prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ catToStrings items - --- Function to extract the function name from a rule +-- | Get the function name for a rule getFunName :: Rule -> String -getFunName (Rule fun _ _ _) = (wpThing fun) +getFunName (Rule fun _ _ _) = wpThing fun --- Helper to check if a rule contains a TokenCat in RHS +-- | Check if a rule contains a specific token category hasTokenCat :: TokenCat -> Rule -> Bool hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] +-- | Safe version of tail that returns an empty list for an empty list safeTail :: [a] -> [a] -safeTail [] = [] -- Si la lista está vacía, devuelve una lista vacía +safeTail [] = [] safeTail (_:xs) = xs -filterSymbs :: [[Char]] -> [[Char]] +-- | Filter out strings ending with "()" +filterSymbs :: [String] -> [String] filterSymbs = filter (not . isSuffixOf "()") -onlySymbs :: [[Char]] -> [[Char]] +-- | Keep only strings ending with "()" +onlySymbs :: [String] -> [String] onlySymbs = filter (isSuffixOf "()") -prSubRule :: Rule -> [String] -prSubRule r@(Rule fun _ _ _) - | isCoercion fun = [] - | otherwise = - ["case (" ++ head vars ++ ", " ++ intercalate " ~ " (safeTail vars) ++ ") => " - ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")"] - where - vars = disambiguateNames $ map modifyVars (prPrintRule_ r) - fnm = funName fun - modifyVars str = if "()" `isSuffixOf` str then str else [toLower (head str)] - - -coerCatDefSign :: Cat -> Doc -coerCatDefSign cat = - text $ "def " ++ pre ++ ": Parser[WorkflowAST] = positioned {" - where - pre = firstLowerCase $ show cat - -prSubRuleDoc :: Rule -> [Doc] -prSubRuleDoc r = map text (prSubRule r) - +-- | Remove an element from a list filterNotEqual :: Eq a => a -> [a] -> [a] filterNotEqual element list = filter (/= element) list -rulesToString :: [Rule] -> [Doc] -rulesToString [] = [""] -rulesToString rules@(Rule _ cat _ _ : _) = - let - -- Filtramos las reglas que pertenecen a la misma categoría `cat` - (sameCat, rest) = span (\(Rule _ c _ _) -> c == cat) rules - - -- Obtenemos los símbolos de las reglas filtradas - vars = concatMap prPrintRule_ sameCat - exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - - integerRuleData = case sameCat of - (fRule:_) -> hasTokenCat catInteger fRule - [] -> False - - -- Si no es "integer", generamos la cabecera única con las alternativas en `rep(...)` - header = [text $ exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {"] - subHeader = [nest 4 $ text $ "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {"] - - -- Generamos los `case` correspondientes a las reglas de `sameCat` - rulesDocs = map (nest 8) $ concatMap prSubRuleDoc sameCat - - -- Si integerRuleData es True, evitamos agregar header y subHeader - mainBlock = if integerRuleData - then [text $ firstLowerCase catInteger ] -- TODO: change this to a map between cat and function name example (CatInteger -> integer) - else header ++ subHeader ++ rulesDocs ++ [nest 4 "}", "}"] - - in mainBlock ++ rulesToString rest - - -ruleGroupsCFToString :: [(Cat, [Rule])] -> [Doc] -ruleGroupsCFToString catsAndRules = - let - -- Process each group to generate its rules - processGroup (c, r) = [coerCatDefSign c] ++ map (nest 4) (rulesToString r) ++ ["}"] - - -- Find the first rule where `TokenCat catInteger` appears - integerRuleData = find (\(_, rules) -> any (hasTokenCat catInteger) rules) catsAndRules - stringRuleData = find (\(_, rules) -> any (hasTokenCat catString) rules) catsAndRules - - -- Generate the integer rule only if needed - integerRule = case integerRuleData of - Just (_, rules) -> case find (hasTokenCat catInteger) rules of - Just rule -> - let funName = getFunName rule in - [ text $ "def integer: Parser[" ++ funName ++ "] = positioned {" - , nest 4 $ text $ "accept(\"integer\", { case INTEGER(i) => " ++ funName ++ "(i.toInt) })" - , "}" - ] - Nothing -> [] - Nothing -> [] - - -- Generate the string rule only if needed - stringRule = case stringRuleData of - Just (_, rules) -> case find (hasTokenCat catString) rules of - Just rule -> - let funName = getFunName rule in - [ text $ "def string: Parser[" ++ funName ++ "] = positioned {" - , nest 4 $ text $ "accept(\"string\", { case STRING(s) => " ++ funName ++ "(s) })" - , "}" - ] - Nothing -> [] - Nothing -> [] - - -- Generate all main rules - mainGeneratedRules = concatMap processGroup catsAndRules - - in mainGeneratedRules ++ integerRule ++ stringRule - +-- | Make variable names unique by adding numbers to duplicates +disambiguateNames :: [String] -> [String] +disambiguateNames = disamb [] + where + disamb ns1 (n:ns2) + | n `elem` (ns1 ++ ns2) = let i = length (filter (==n) ns1) + 1 + in (n ++ show i) : disamb (n:ns1) ns2 + | otherwise = n : disamb (n:ns1) ns2 + disamb _ [] = [] +-- | Generate the imports section imports :: String -> [Doc] -imports name = [ - text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser" - , text $ "import " ++ name ++ ".workflowtoken." ++ name ++ "Lex._" - , "import scala.util.parsing.combinator.Parsers" - , "import scala.util.parsing.input.{NoPosition, Position, Reader}" +imports name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Parser", + text "", + text $ "import " ++ name ++ ".workflowtoken." ++ name ++ "Lex._", + text "", + text "import scala.util.parsing.combinator.Parsers", + text "", + text "import scala.util.parsing.input.{NoPosition, Position, Reader}" ] - +-- | Generate the extra classes section addExtraClasses :: [Doc] addExtraClasses = [ - "class WorkflowTokenReader(tokens: Seq[WorkflowToken]) extends Reader[WorkflowToken] {" - , nest 4 "override def first: WorkflowToken = tokens.head" - , nest 4 "override def atEnd: Boolean = tokens.isEmpty" - , nest 4 "override def pos: Position = tokens.headOption.map(_.pos).getOrElse(NoPosition)" - , nest 4 "override def rest: Reader[WorkflowToken] = new WorkflowTokenReader(tokens.tail)" - , "}" + text "override type Elem = WorkflowToken", + text "", + text "class WorkflowTokenReader(tokens: Seq[WorkflowToken]) extends Reader[WorkflowToken] {", + nest 4 $ text "override def first: WorkflowToken = tokens.head", + nest 4 $ text "override def atEnd: Boolean = tokens.isEmpty", + nest 4 $ text "override def pos: Position = tokens.headOption.map(_.pos).getOrElse(NoPosition)", + nest 4 $ text "override def rest: Reader[WorkflowToken] = new WorkflowTokenReader(tokens.tail)", + text "}" ] +-- | Generate the WorkflowCompiler object addWorkflowCompiler :: [Doc] addWorkflowCompiler = [ - "object WorkflowCompiler {" - , nest 4 "def apply(code: String): Either[WorkflowCompilationError, WorkflowAST] = {" - , nest 8 "for {" - , nest 12 "tokens <- WorkflowLexer(code).right" - , nest 12 "ast <- WorkflowParser(tokens).right" - , nest 8 "} yield ast" - , nest 4 "}" - , "}" + text "", + text "object WorkflowCompiler {", + nest 4 $ text "def apply(code: String): Either[WorkflowCompilationError, WorkflowAST] = {", + nest 8 $ text "for {", + nest 12 $ text "tokens <- WorkflowLexer(code).right", + nest 12 $ text "ast <- WorkflowParser(tokens).right", + nest 8 $ text "} yield ast", + nest 4 $ text "}", + text "}" ] +-- | Start the WorkflowParser object initWorkflowClass :: [Doc] initWorkflowClass = [ - "object WorkflowParser extends Parsers {" - , nest 4 "override type Elem = WorkflowToken" - -- TODO: I REMOVED THIS LINE TO MAKE IT WORK, BUT WILL FAIL WITH GRAMMARS WHICH USE TABS/SPACES AS BLOCKS DEFINITION, SHOULD WE WORK ON THIS? - -- , nest 4 "override def skipWhitespace = true" - -- In case the lenguage use indentation for block creation, we should remove \n from the list of whiteSpace. - -- , nest 4 $ text $ "override val whiteSpace = " ++ "\"" ++ "[\\t\\r\\f\\n]+\".r" + text "", + text "object WorkflowParser extends Parsers {" ] +-- | End the WorkflowParser object endWorkflowClass :: [Doc] endWorkflowClass = [ - "}" + text "}" ] +-- | Generate the apply function getApplyFunction :: [Doc] getApplyFunction = [ - "def apply(tokens: Seq[WorkflowToken]): Either[WorkflowParserError, WorkflowAST] = {" - , nest 4 "val reader = new WorkflowTokenReader(tokens)" - , nest 4 "program(reader) match {" - , nest 4 "case NoSuccess(msg, next) => Left(WorkflowParserError(Location(next.pos.line, next.pos.column), msg))" - , nest 4 "case Success(result, next) => Right(result)" - , nest 4 "}" - , "}" + text "", + text "def apply(tokens: Seq[WorkflowToken]): Either[WorkflowParserError, WorkflowAST] = {", + nest 4 $ text "val reader = new WorkflowTokenReader(tokens)", + nest 4 $ text "program(reader) match {", + nest 8 $ text "case NoSuccess(msg, next) => Left(WorkflowParserError(Location(next.pos.line, next.pos.column), msg))", + nest 8 $ text "case Success(result, next) => Right(result)", + nest 4 $ text "}", + text "}" ] - - +-- | Generate the program function getProgramFunction :: CF -> [Doc] getProgramFunction cf = [ - "def program: Parser[WorkflowAST] = positioned {" - , nest 4 $ text $ "phrase(" ++ eps ++ ")" - ,"}" + text "", + text "def program: Parser[WorkflowAST] = positioned {", + nest 4 $ text $ "phrase(" ++ entryPoint ++ ")", + text "}" ] where - eps = firstLowerCase $ show $ head $ map normCat $ DF.toList $ allEntryPoints cf - - -disambiguateNames :: [String] -> [String] -disambiguateNames = disamb [] - where - disamb ns1 (n:ns2) - | n `elem` (ns1 ++ ns2) = let i = length (filter (==n) ns1) + 1 - in (n ++ show i) : disamb (n:ns1) ns2 - | otherwise = n : disamb (n:ns1) ns2 - disamb _ [] = [] + entryPoint = firstLowerCase $ show $ head $ map normCat $ DF.toList $ allEntryPoints cf \ No newline at end of file From 0488a925d93b94c19913738648399968b588eac0 Mon Sep 17 00:00:00 2001 From: WilliamOlsen13 <50978646+WilliamOlsen13@users.noreply.github.com> Date: Thu, 27 Mar 2025 00:57:49 -0300 Subject: [PATCH 24/42] Fix Lexer formatting Code refactoring to replace document fragmentation for cleaner code and better formatting. --- source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 212 +++++++++--------- 1 file changed, 110 insertions(+), 102 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index 03f6e9dbf..e788239cb 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -4,11 +4,12 @@ {- BNF Converter: Scala Lextract syntax - Copyright (Scala) 2024 Author: Juan Pablo Poittevin + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura Description : This module generates the Scala Lextract Syntax tree classes. Using Scala Parser Combinator - Author : Juan Pablo Poittevin + + Author : Juan Pablo Poittevin, Guillermo Poladura Created : 30 September, 2024 -} @@ -21,164 +22,171 @@ import BNFC.CF import BNFC.PrettyPrint import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) -import Data.List ( intercalate ) +import Data.List (intercalate) import Data.Char (toLower) +-- | Converts a string to lowercase toLowerString :: String -> String toLowerString s = map toLower s -cf2ScalaLex - :: SharedOptions - -> CF -- Grammar. - -> Doc -cf2ScalaLex Options{ lang } cf = vsep . concat $ - [ - [] - , imports lang - , addExtraClasses - , initWorkflowClass - , map (nest 4) getApplyFunction - , map (nest 4) (getTokensFunction (symbs ++ liters)) - -- , map (nest 4) getIdentifiersFunction -- This should be called only in case of having identifiers in the lenguage, which anyway will be the case in most of the lenguages. - , map (nest 4) (addParserFunctions liters) - -- , map (nest 4) getLiteralsFunction - -- , getIndentationsFunction - , map (nest 4) (getKeywordParsers symbs) - , endWorkflowClass - ] +-- | Main function that generates the Scala lexer code +cf2ScalaLex :: SharedOptions -> CF -> Doc +cf2ScalaLex Options{ lang } cf = vcat $ + -- Generate header and imports + imports lang ++ + -- Generate error and location classes + addExtraClasses ++ + -- Start the WorkflowLexer object + initWorkflowClass ++ + -- Indent the class contents + [nest 4 $ vcat $ + -- Add apply function + getApplyFunction ++ + -- Add tokens function + getTokensFunction (symbs ++ liters) ++ + -- Add parser functions for literals + map addParserFunction liters ++ + -- Add keyword parsers + map getKeywordParser symbs + ] ++ + -- End the WorkflowLexer object + endWorkflowClass where - symbs = unicodeAndSymbols cf - liters = literals cf - --- catString, catInteger, catDouble, catChar, catIdent :: TokenCat --- catString = "String" --- catInteger = "Integer" --- catDouble = "Double" --- catChar = "Char" --- catIdent = "Ident" - -addParserFunctions :: [String] -> [Doc] -addParserFunctions liters = map addParserFunction liters + symbs = unicodeAndSymbols cf + liters = literals cf +-- | Generate a parser function for a specific literal type addParserFunction :: String -> Doc addParserFunction liter - | liter == catInteger = getIntegerFunction - | liter == catDouble = getDoubleFunction - | liter == catIdent = getIdentifiersFunction - | liter == catString = getLiteralsFunction - | liter == catChar = getLiteralFunction - | otherwise = "" - + | liter == catInteger = getIntegerFunction + | liter == catDouble = getDoubleFunction + | liter == catIdent = getIdentifiersFunction + | liter == catString = getLiteralsFunction + | liter == catChar = getLiteralFunction + | otherwise = empty + +-- | Generate the imports section imports :: String -> [Doc] -imports name = [ - text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Lex" - , "import scala.util.parsing.combinator.RegexParsers" +imports name = [ + text $ "package " ++ name ++ ".workflowtoken." ++ name ++ "Lex", + text "", + text "import scala.util.parsing.combinator.RegexParsers" ] +-- | Generate error and location classes addExtraClasses :: [Doc] addExtraClasses = [ - "sealed trait WorkflowCompilationError" - ,"case class WorkflowLexerError(location: Location, msg: String) extends WorkflowCompilationError" - ,"case class WorkflowParserError(location: Location, msg: String) extends WorkflowCompilationError" - ,"case class Location(line: Int, column: Int) {" - ,nest 4 "override def toString = s\"$line:$column\"" - ,"}" + text "", + text "sealed trait WorkflowCompilationError", + text "case class WorkflowLexerError(location: Location, msg: String) extends WorkflowCompilationError", + text "case class WorkflowParserError(location: Location, msg: String) extends WorkflowCompilationError", + text "", + text "case class Location(line: Int, column: Int) {", + nest 4 $ text "override def toString = s\"$line:$column\"", + text "}" ] - +-- | Start the WorkflowLexer object initWorkflowClass :: [Doc] initWorkflowClass = [ - "object WorkflowLexer extends RegexParsers {" - -- TODO: I REMOVED THIS LINE TO MAKE IT WORK, BUT WILL FAIL WITH GRAMMARS WHICH USE TABS/SPACES AS BLOCKS DEFINITION, SHOULD WE WORK ON THIS? - -- , nest 4 "override def skipWhitespace = true" - -- In case the lenguage use indentation for block creation, we should remove \n from the list of whiteSpace. - -- , nest 4 $ text $ "override val whiteSpace = " ++ "\"" ++ "[\\t\\r\\f\\n]+\".r" + text "", + text "object WorkflowLexer extends RegexParsers {" ] +-- | End the WorkflowLexer object endWorkflowClass :: [Doc] endWorkflowClass = [ - "}" + text "}" ] +-- | Generate the apply function getApplyFunction :: [Doc] getApplyFunction = [ - "def apply(code: String): Either[WorkflowLexerError, List[WorkflowToken]] = {" - , nest 4 "parse(tokens, code) match {" - , nest 6 "case NoSuccess(msg, next) => Left(WorkflowLexerError(Location(next.pos.line, next.pos.column), msg))" - , nest 6 "case Success(result, next) => Right(result)" - , nest 4"}" - , "}" + text "def apply(code: String): Either[WorkflowLexerError, List[WorkflowToken]] = {", + nest 4 $ text "parse(tokens, code) match {", + nest 8 $ text "case NoSuccess(msg, next) => Left(WorkflowLexerError(Location(next.pos.line, next.pos.column), msg))", + nest 8 $ text "case Success(result, next) => Right(result)", + nest 4 $ text "}", + text "}" ] - +-- | Generate the function for parsing identifiers getIdentifiersFunction :: Doc getIdentifiersFunction = vcat [ - "def ident: Parser[IDENT] = {" - , nest 4 "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENT(str) }" - , "}" + text "", + text "def ident: Parser[IDENT] = {", + nest 4 $ text "\"[a-zA-Z_][a-zA-Z0-9_]*\".r ^^ { str => IDENT(str) }", + text "}" ] +-- | Generate the function for parsing integers getIntegerFunction :: Doc getIntegerFunction = vcat [ - "def integer: Parser[INTEGER] = {" - , nest 4 "\"[0-9]+\".r ^^ {i => INTEGER(i)}" - , "}" + text "", + text "def integer: Parser[INTEGER] = {", + nest 4 $ text "\"[0-9]+\".r ^^ {i => INTEGER(i)}", + text "}" ] - +-- | Generate the function for parsing double values getDoubleFunction :: Doc getDoubleFunction = vcat [ - "def double: Parser[Double] = {" - , nest 4 "\"[0-9]+.[0-9]+\".r ^^ {i => Double(i)}" - , "}" + text "", + text "def double: Parser[Double] = {", + nest 4 $ text "\"[0-9]+.[0-9]+\".r ^^ {i => Double(i)}", + text "}" ] - +-- | Generate the function for parsing string literals getLiteralsFunction :: Doc getLiteralsFunction = vcat [ - "def string: Parser[STRING] = {" - , nest 4 "\"\\\"[^\\\"]*\\\"\".r ^^ { str =>" - , nest 6 "val content = str.substring(1, str.length - 1)" - , nest 6 "STRING(content)" - , nest 4 "}" - , "}" + text "", + text "def string: Parser[STRING] = {", + nest 4 $ text "\"\\\"[^\\\"]*\\\"\".r ^^ { str =>", + nest 8 $ text "val content = str.substring(1, str.length - 1)", + nest 8 $ text "STRING(content)", + nest 4 $ text "}", + text "}" ] +-- | Generate the function for parsing character literals getLiteralFunction :: Doc getLiteralFunction = vcat [ - "def char: Parser[CHAR] = {" - , nest 4 "\"\\\'[^\\\']*\\\'\".r ^^ { str =>" - , nest 6 "val content = str.substring(1, str.length - 1)" - , nest 6 "CHAR(content)" - , nest 4 "}" - , "}" + text "", + text "def char: Parser[CHAR] = {", + nest 4 $ text "\"\\\'[^\\\']*\\\'\".r ^^ { str =>", + nest 8 $ text "val content = str.substring(1, str.length - 1)", + nest 8 $ text "CHAR(content)", + nest 4 $ text "}", + text "}" ] - +-- | Get a symbol name from a string getSymbFromName :: String -> String getSymbFromName s = case symbolToName s of - Just s -> s - _ -> s - + Just s -> s + _ -> s +-- | Generate the tokens function getTokensFunction :: [String] -> [Doc] getTokensFunction symbs = [ - "def tokens: Parser[List[WorkflowToken]] = {" - , nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))" - , "}" + text "", + text "def tokens: Parser[List[WorkflowToken]] = {", + nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))", + text "}" ] - +-- | Get the lowercase symbol name getSymName :: String -> String -getSymName = toLowerString.getSymbFromName +getSymName = toLowerString . getSymbFromName +-- | Get a list of symbol names getListSymNames :: [String] -> [String] -getListSymNames symbs = map getSymName symbs - -getKeywordParsers :: [String] -> [Doc] -getKeywordParsers symbs = map getKeywordParser symbs - +getListSymNames = map getSymName +-- | Generate a keyword parser for a symbol getKeywordParser :: String -> Doc -getKeywordParser symb = text $ "def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ =>"++ getSymbFromName symb ++"()) }" +getKeywordParser symb = + text "" $+$ + text ("def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ => " ++ getSymbFromName symb ++ "()) }") \ No newline at end of file From cac7fe17cda3b1c94270d8c58f0e820670cadf81 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Fri, 28 Mar 2025 00:07:42 +0100 Subject: [PATCH 25/42] change in parser AST to work with LBNF example --- source/src/BNFC/Backend/Scala.hs | 5 +- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 48 +++++++++++++------ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/source/src/BNFC/Backend/Scala.hs b/source/src/BNFC/Backend/Scala.hs index 6da857846..1498aacb5 100644 --- a/source/src/BNFC/Backend/Scala.hs +++ b/source/src/BNFC/Backend/Scala.hs @@ -5,8 +5,7 @@ import Prelude hiding ((<>)) import BNFC.Backend.Base (mkfile, Backend) import BNFC.CF import BNFC.Options hiding (Backend) -import BNFC.PrettyPrint (vcat, Doc) -import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) +-- import BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) import BNFC.Backend.Scala.CFtoScalaLex (cf2ScalaLex) import BNFC.Backend.Scala.CFtoScalaLexToken (cf2ScalaLexToken) import BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) @@ -16,7 +15,7 @@ import BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) makeScala :: SharedOptions -> CF -> Backend makeScala opts cf = do - mkfile (name ++ ".scala") comment $ cf2ScalaAbs opts cf + -- mkfile (name ++ ".scala") comment $ cf2ScalaAbs opts cf mkfile (name ++ "LexToken.scala") comment $ cf2ScalaLexToken opts cf mkfile (name ++ "Lex.scala") comment $ cf2ScalaLex opts cf mkfile (name ++ "Parser.scala") comment $ cf2ScalaParser opts cf diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index e4101d3bc..85b2ca917 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -21,11 +21,6 @@ import Prelude hiding ((<>)) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options -import BNFC.Backend.Common (unicodeAndSymbols) -import BNFC.Utils (symbolToName, camelCase) -import Data.Char (toUpper) - -import BNFC.Backend.Common.NamedVariables (firstLowerCase) import Data.List (intercalate) import Data.Map (Map, fromList, lookup) @@ -60,22 +55,45 @@ createCaseClass (Rule fun _ rhs _) = text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" where className = funName fun - paramNames = filter (not . null) $ map firstLowerCase (catFilterToStrings rhs) - params = intercalate ", " $ map formatParam paramNames + + -- Función para formatear parámetros según sean Cat o String + catParams :: Either Cat String -> String + catParams (Left c) = formatParam c + catParams (Right s) = s + + -- Aplicamos `catParams` a cada elemento de `rhs` + params = intercalate ", " $ map catParams rhs + + +unwrapListCat :: Cat -> TokenCat +unwrapListCat (TokenCat c) = c +unwrapListCat (ListCat lc) = unwrapListCat lc +unwrapListCat (CoercCat cc _) = cc +unwrapListCat (Cat s) = s + -- | Format a parameter with its type -formatParam :: String -> String -formatParam param - | (show (camelCase param)) `elem` BNFC.CF.baseTokenCatNames = - param ++ ": " ++ case baseTypeToScalaType param of - Just s -> s - _ -> "Int" -- Default to Int - | otherwise = param ++ ": WorkflowAST" +formatParam :: Cat -> String +formatParam cat = + let baseCat = unwrapListCat cat -- Extraemos el TokenCat base + baseCatStr = show baseCat + in if baseCat `elem` BNFC.CF.baseTokenCatNames + then case baseTypeToScalaType baseCatStr of + Just s -> wrapList s + Nothing -> wrapList ": Int" -- Default a "Int" + else wrapList "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST + + where + -- Si es una lista, lo envolvemos en brackets + wrapList s = case cat of + ListCat _ -> "[" ++ s ++ "]" + _ -> ":" ++ s + -- | Extract category strings from rule RHS catFilterToStrings :: [Either Cat String] -> [String] catFilterToStrings = map (\case - Left c -> show c + Left c -> show $ catOfList c Right _ -> "" ) From 41b17c37d9cd68e6ea96775607fb7d339c8feae7 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Tue, 1 Apr 2025 23:22:50 +0200 Subject: [PATCH 26/42] fixes on parser scala generator --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 48 ++++++++++++++++--- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 33 ++++++------- 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index b0a7b2b19..6f124f0f9 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -25,6 +25,9 @@ import BNFC.Options import Data.List (find, intercalate, isSuffixOf) import BNFC.Backend.Common.NamedVariables (firstLowerCase) import Data.Char (toLower) +import Data.Maybe (listToMaybe) +import System.Directory.Internal.Prelude (fromMaybe) +import GHC.Unicode (isAlphaNum) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -79,13 +82,12 @@ generateRuleBody rules@(Rule _ cat _ _ : _) = else let vars = concatMap prPrintRule_ rules - exitCatName = firstLowerCase $ head $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars + exitCatName = firstLowerCase $ fromMaybe (error "Empty list encountered") $ listToMaybe $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars headerText = exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" subHeaderText = "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" caseStatements = map generateCaseStatement rules - caseText = intercalate "\n" $ filter (not . null) caseStatements in text headerText $+$ nest 4 (text subHeaderText) $+$ @@ -94,17 +96,49 @@ generateRuleBody rules@(Rule _ cat _ _ : _) = text "}" generateRuleBody [] = empty --- | Generate a case statement for a rule + + +-- generateCaseStatement :: Rule -> String +-- generateCaseStatement r@(Rule fun _ _ _) +-- | isCoercion fun = "" +-- | null vars = fnm ++ "()" -- Special case for an empty list +-- | otherwise = +-- "case (" ++ head vars ++ concatMap (" ~ " ++) (safeTail vars) ++ ") => " +-- ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" +-- where +-- vars = disambiguateNames $ map modifyVars (prPrintRule_ r) +-- fnm = funName fun +-- modifyVars str +-- | "()" `isSuffixOf` str = str +-- | all isAlphaNum str = case str of +-- (x:_) -> [toLower x] +-- [] -> error "Empty string encountered in modifyVars" +-- | otherwise = "sym" ++ show (length str) -- Replace invalid symbols with placeholders + + +-- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String generateCaseStatement r@(Rule fun _ _ _) | isCoercion fun = "" + | null vars = fnm ++ "()" -- Caso especial para lista vacía | otherwise = - "case (" ++ head vars ++ ", " ++ intercalate " ~ " (safeTail vars) ++ ") => " + "case (" ++ head vars ++ concatMap (" ~ " ++) (safeTail vars) ++ ") => " ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" where vars = disambiguateNames $ map modifyVars (prPrintRule_ r) fnm = funName fun - modifyVars str = if "()" `isSuffixOf` str then str else [toLower (head str)] + -- modifyVars str = if "()" `isSuffixOf` str + -- then getSymbFromName str + -- else case str of + -- (x:_) -> [toLower x] + -- [] -> error "Empty string encountered in modifyVars" + modifyVars str + | "()" `isSuffixOf` str = str + | all isAlphaNum str = case str of + (x:_) -> [toLower x] + [] -> error "Empty string encountered in modifyVars" + | otherwise = getSymbFromName str -- Replace invalid symbols with placeho + -- | Generate a special rule for tokens like Integer or String generateSpecialRule :: TokenCat -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] @@ -250,4 +284,6 @@ getProgramFunction cf = [ text "}" ] where - entryPoint = firstLowerCase $ show $ head $ map normCat $ DF.toList $ allEntryPoints cf \ No newline at end of file + entryPoint = case listToMaybe (map normCat $ DF.toList $ allEntryPoints cf) of + Just ep -> firstLowerCase $ show ep + Nothing -> error "No entry points found in the context-free grammar." diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 85b2ca917..5ab861f67 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -23,6 +23,8 @@ import BNFC.PrettyPrint import BNFC.Options import Data.List (intercalate) import Data.Map (Map, fromList, lookup) +import BNFC.Utils ((+++)) +import Debug.Trace (trace) -- | Main function that generates the AST code cf2ScalaParserAST :: SharedOptions -> CF -> Doc @@ -58,11 +60,14 @@ createCaseClass (Rule fun _ rhs _) = -- Función para formatear parámetros según sean Cat o String catParams :: Either Cat String -> String - catParams (Left c) = formatParam c - catParams (Right s) = s + catParams (Left c) = formatParamType c + catParams (Right _) = "WorflowAST" -- Aplicamos `catParams` a cada elemento de `rhs` - params = intercalate ", " $ map catParams rhs + params = intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateStringList rhs) (map catParams rhs) + +generateStringList :: [a] -> [String] +generateStringList xs = zipWith (\_ i -> "var" ++ show i) xs [1..] unwrapListCat :: Cat -> TokenCat @@ -73,29 +78,21 @@ unwrapListCat (Cat s) = s -- | Format a parameter with its type -formatParam :: Cat -> String -formatParam cat = +formatParamType :: Cat -> String +formatParamType cat = let baseCat = unwrapListCat cat -- Extraemos el TokenCat base - baseCatStr = show baseCat in if baseCat `elem` BNFC.CF.baseTokenCatNames - then case baseTypeToScalaType baseCatStr of + then case baseTypeToScalaType baseCat of Just s -> wrapList s - Nothing -> wrapList ": Int" -- Default a "Int" + Nothing -> "String" -- Default a "String" else wrapList "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST + where -- Si es una lista, lo envolvemos en brackets wrapList s = case cat of - ListCat _ -> "[" ++ s ++ "]" - _ -> ":" ++ s - - --- | Extract category strings from rule RHS -catFilterToStrings :: [Either Cat String] -> [String] -catFilterToStrings = map (\case - Left c -> show $ catOfList c - Right _ -> "" - ) + ListCat _ -> "List[" ++ s ++ "]" + _ -> s -- | Generate the header part of the file headers :: String -> [Doc] From 9f414f77700f47c37bbad44c3375a0203b710c5e Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Thu, 3 Apr 2025 16:15:22 +0200 Subject: [PATCH 27/42] fix minor problems and add utils file --- source/BNFC.cabal | 1 + .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 43 +---- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 62 ++---- source/src/BNFC/Backend/Scala/Utils.hs | 178 ++++++++++++++++++ 4 files changed, 208 insertions(+), 76 deletions(-) create mode 100644 source/src/BNFC/Backend/Scala/Utils.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index ae505a53e..999b062c6 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -274,6 +274,7 @@ library -- Scala backend BNFC.Backend.Scala + BNFC.Backend.Scala.Utils BNFC.Backend.Scala.CFtoScalaAbs BNFC.Backend.Scala.CFtoScalaLex BNFC.Backend.Scala.CFtoScalaLexToken diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 6f124f0f9..14b836cd1 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -15,19 +15,20 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where +import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) +import GHC.Unicode (isAlphaNum) -import qualified Data.Foldable as DF (toList) -import BNFC.Utils (symbolToName) +import BNFC.Backend.Scala.Utils (safeTail, safeCatName, getSymbFromName, hasTokenCat, catToStrings, getFunName, inspectListRulesByCategory) import BNFC.CF import BNFC.PrettyPrint -import BNFC.Options -import Data.List (find, intercalate, isSuffixOf) +import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase) + +import Data.List (find, intercalate, isSuffixOf) import Data.Char (toLower) import Data.Maybe (listToMaybe) import System.Directory.Internal.Prelude (fromMaybe) -import GHC.Unicode (isAlphaNum) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -46,6 +47,8 @@ cf2ScalaParser Options{ lang } cf = vcat $ getProgramFunction cf ++ -- Add parser rules generateAllRules (ruleGroups cf) + -- inspect rules, only for debugging + -- ++ inspectListRulesByCategory (ruleGroups cf) ] ++ -- End the WorkflowParser object endWorkflowClass ++ @@ -72,7 +75,7 @@ generateRuleGroup (cat, rules) = [nest 4 $ generateRuleBody rules] ++ [text "}"] where - catName = firstLowerCase $ show cat + catName = safeCatName cat -- | Generate the body of a rule generateRuleBody :: [Rule] -> Doc @@ -122,7 +125,7 @@ generateCaseStatement r@(Rule fun _ _ _) | isCoercion fun = "" | null vars = fnm ++ "()" -- Caso especial para lista vacía | otherwise = - "case (" ++ head vars ++ concatMap (" ~ " ++) (safeTail vars) ++ ") => " + "case (" ++ head vars ++ ", " ++ intercalate " ~ " (safeTail vars) ++ ") => " ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" where vars = disambiguateNames $ map modifyVars (prPrintRule_ r) @@ -161,32 +164,6 @@ generateSpecialRule tokenCat ruleName tokenName conversion catsAndRules = prPrintRule_ :: Rule -> [String] prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ catToStrings items --- | Convert a category or string to its string representation -catToStrings :: [Either Cat String] -> [String] -catToStrings = map (\case - Left c -> show c - Right s -> s - ) - --- | Get a symbol name from a string -getSymbFromName :: String -> String -getSymbFromName s = - case symbolToName s of - Just s -> s ++ "()" - _ -> s - --- | Get the function name for a rule -getFunName :: Rule -> String -getFunName (Rule fun _ _ _) = wpThing fun - --- | Check if a rule contains a specific token category -hasTokenCat :: TokenCat -> Rule -> Bool -hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] - --- | Safe version of tail that returns an empty list for an empty list -safeTail :: [a] -> [a] -safeTail [] = [] -safeTail (_:xs) = xs -- | Filter out strings ending with "()" filterSymbs :: [String] -> [String] diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 5ab861f67..8d6cee54c 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -19,12 +19,19 @@ module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) where import Prelude hiding ((<>)) import BNFC.CF -import BNFC.PrettyPrint -import BNFC.Options + ( baseTokenCatNames, + ruleGroups, + CF, + Cat(ListCat), + IsFun(funName, isCoercion), + Rul(Rule), + Rule, + WithPosition(wpThing) ) +import BNFC.PrettyPrint ( text, vcat, Doc ) +import BNFC.Options ( SharedOptions(lang, Options) ) +import BNFC.Backend.Scala.Utils (generateVarsList, unwrapListCat, baseTypeToScalaType, wrapList, isLeft) import Data.List (intercalate) -import Data.Map (Map, fromList, lookup) import BNFC.Utils ((+++)) -import Debug.Trace (trace) -- | Main function that generates the AST code cf2ScalaParserAST :: SharedOptions -> CF -> Doc @@ -53,29 +60,20 @@ isCoercionRule (Rule fun _ _ _) = isCoercion fun -- | Create a single case class definition createCaseClass :: Rule -> Doc -createCaseClass (Rule fun _ rhs _) = - text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" +createCaseClass (Rule fun cat rhs _) + | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the lsit + | otherwise = text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" where className = funName fun -- Función para formatear parámetros según sean Cat o String catParams :: Either Cat String -> String catParams (Left c) = formatParamType c - catParams (Right _) = "WorflowAST" + catParams (Right _) = "WorkflowAST" -- Aplicamos `catParams` a cada elemento de `rhs` - params = intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateStringList rhs) (map catParams rhs) - -generateStringList :: [a] -> [String] -generateStringList xs = zipWith (\_ i -> "var" ++ show i) xs [1..] - - -unwrapListCat :: Cat -> TokenCat -unwrapListCat (TokenCat c) = c -unwrapListCat (ListCat lc) = unwrapListCat lc -unwrapListCat (CoercCat cc _) = cc -unwrapListCat (Cat s) = s - + params = intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) + filteredRhs = filter isLeft rhs -- | Format a parameter with its type formatParamType :: Cat -> String @@ -83,16 +81,10 @@ formatParamType cat = let baseCat = unwrapListCat cat -- Extraemos el TokenCat base in if baseCat `elem` BNFC.CF.baseTokenCatNames then case baseTypeToScalaType baseCat of - Just s -> wrapList s + Just s -> wrapList cat s Nothing -> "String" -- Default a "String" - else wrapList "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST - + else wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST - where - -- Si es una lista, lo envolvemos en brackets - wrapList s = case cat of - ListCat _ -> "List[" ++ s ++ "]" - _ -> s -- | Generate the header part of the file headers :: String -> [Doc] @@ -103,19 +95,3 @@ headers name = [ text "", text "sealed trait WorkflowAST extends Positional" ] - --- | Convert base LBNF type to Scala type -baseTypeToScalaType :: String -> Maybe String -baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) - --- | Map from base LBNF Type to scala Type -baseTypeMap :: Map String String -baseTypeMap = fromList scalaTypesMap - --- | Scala types mapping -scalaTypesMap :: [(String, String)] -scalaTypesMap = - [ ("Integer" , "Int") - , ("String" , "String") - , ("Double" , "Double") - ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs new file mode 100644 index 000000000..c28899489 --- /dev/null +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -0,0 +1,178 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Utils + Copyright (Scala) 2024 Author: Juan Pablo Poittevin, Guillermo Poladura + + Description : This module is a helper for Scala backend + Author : Juan Pablo Poittevin, Guillermo Poladura + Created : 30 September, 2024 +-} + +module BNFC.Backend.Scala.Utils ( + generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, + wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, + getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory +) where +import BNFC.CF +import Data.Map +import BNFC.Backend.Common.NamedVariables (firstLowerCase) +import System.Directory.Internal.Prelude (fromMaybe) +import BNFC.Utils (symbolToName) +import Data.Char (toUpper) +import Text.PrettyPrint +import BNFC.PrettyPrint + + +generateVarsList :: [a] -> [String] +generateVarsList xs = zipWith (\_ (i :: Int) -> "var" ++ show i) xs [1..] + +unwrapListCat :: Cat -> TokenCat +unwrapListCat (TokenCat c) = c +unwrapListCat (ListCat lc) = unwrapListCat lc +unwrapListCat (CoercCat cc _) = cc +unwrapListCat (Cat s) = s + +wrapList :: Cat -> String -> [Char] +wrapList cat s = case cat of + ListCat _ -> "List[" ++ s ++ "]" + _ -> s + +-- | Convert base LBNF type to Scala type +baseTypeToScalaType :: String -> Maybe String +baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) + +-- | Map from base LBNF Type to scala Type +baseTypeMap :: Map String String +baseTypeMap = fromList scalaTypesMap + +-- | Scala types mapping +scalaTypesMap :: [(String, String)] +scalaTypesMap = + [ ("Integer" , "Int") + , ("String" , "String") + , ("Double" , "Double") + ] + +scalaReserverWords :: String -> Maybe String +scalaReserverWords = (`Data.Map.lookup` reserverWordsMap) + +-- | Map from base LBNF Type to scala Type +reserverWordsMap :: Map String String +reserverWordsMap = fromList wordsMap + +-- | Scala reserverd words mapping +wordsMap :: [(String, String)] +wordsMap = + [ + ("def" , "_def") + , ("val" , "_val") + , ("var" , "_var") + , ("class", "_class") + , ("object", "_object") + , ("trait", "_trait") + , ("extends", "_extends") + , ("with", "_with") + , ("case", "_case") + , ("sealed", "_sealed") + , ("abstract", "_abstract") + , ("final", "_final") + , ("override", "_override") + , ("implicit", "_implicit") + , ("lazy", "_lazy") + , ("private", "_private") + , ("protected", "_protected") + , ("public", "_public") + , ("import", "_import") + , ("package", "_package") + , ("return", "_return") + , ("if", "_if") + , ("else", "_else") + , ("while", "_while") + , ("for", "_for") + , ("do", "_do") + , ("match", "_match") + ] + +-- | Safe version of tail that returns an empty list for an empty list +safeTail :: [a] -> [a] +safeTail [] = [] +safeTail (_:xs) = xs + +-- | Safe version of head that returns an empty list for an empty list +safeHeadString :: [String] -> String +safeHeadString [] = "" +safeHeadString (x:_) = x + + +safeCatName :: Cat -> String +safeCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) + where + notSafeScalaCatName = case cat of + ListCat innerCat -> "list" ++ firstUpperCase (safeCatName innerCat) -- Handle ListCat explicitly + _ -> firstLowerCase $ show cat + + +safeRefCatName :: Cat -> String +safeRefCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) + where + notSafeScalaCatName = case cat of + ListCat innerCat -> firstUpperCase (safeCatName innerCat) -- Handle ListCat explicitly + _ -> firstLowerCase $ show cat + +firstUpperCase :: String -> String +firstUpperCase [] = [] +firstUpperCase (x:xs) = toUpper x : xs + +-- | Get a symbol name from a string +getSymbFromName :: String -> String +getSymbFromName s = + case symbolToName s of + Just s -> s ++ "()" + _ -> s + + + +-- | Convert a category or string to its string representation +-- | Convert a category or string to its string representation +catToStrings :: [Either Cat String] -> [String] +catToStrings = Prelude.map (\case + Left c -> show c + Right s -> s + ) + +-- | Get the function name for a rule +getFunName :: Rule -> String +getFunName (Rule fun _ _ _) = wpThing fun + +-- | Check if a rule contains a specific token category +hasTokenCat :: TokenCat -> Rule -> Bool +hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] + +-- Function to generate a list of Doc from a (Cat, [Rule]) +inspectRulesByCategory :: (Cat, [Rule]) -> [Doc] +inspectRulesByCategory (cat, rules) = + [text "Category:" <+> pretty cat] ++ + [text "Rules:" $$ nest 2 (vcat (Prelude.map prettyRule rules))] + where + -- Pretty-print a single Rule + prettyRule (Rule fun valCat rhs internal) = + text "Function:" <+> pretty fun $$ + text "Value Category:" <+> pretty valCat $$ + text "RHS:" <+> text (show rhs) $$ + text "Internal:" <+> text (internalRuleToStrings internal) + +internalRuleToStrings :: InternalRule -> String +internalRuleToStrings Internal = "Internal" +internalRuleToStrings Parsable = "Parsable" + + +-- | Convert a list of rules to a list of strings +inspectListRulesByCategory :: [(Cat, [Rule])] -> [Doc] +inspectListRulesByCategory rulesByCat = concatMap inspectRulesByCategory rulesByCat + +isLeft :: Either a b -> Bool +isLeft (Left _) = True +isLeft _ = False \ No newline at end of file From be98d95b5a7c452ce304239d4207311f80dcbeb1 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sat, 5 Apr 2025 20:12:17 +0200 Subject: [PATCH 28/42] changes to make the parser more scalable --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 94 ++++++++++--------- source/src/BNFC/Backend/Scala/Utils.hs | 39 +++++++- 2 files changed, 86 insertions(+), 47 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 14b836cd1..a2636fbcb 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -19,16 +19,17 @@ import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) import GHC.Unicode (isAlphaNum) -import BNFC.Backend.Scala.Utils (safeTail, safeCatName, getSymbFromName, hasTokenCat, catToStrings, getFunName, inspectListRulesByCategory) +import BNFC.Backend.Scala.Utils (safeTail, safeCatName, getSymbFromName, hasTokenCat, catToStrings, getFunName, inspectListRulesByCategory, getRulesFunsName, rhsToSafeStrings, disambiguateNames, getRHSCats) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) -import BNFC.Backend.Common.NamedVariables (firstLowerCase) +import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) import Data.List (find, intercalate, isSuffixOf) import Data.Char (toLower) import Data.Maybe (listToMaybe) import System.Directory.Internal.Prelude (fromMaybe) +import BNFC.Utils ((+++)) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -48,7 +49,7 @@ cf2ScalaParser Options{ lang } cf = vcat $ -- Add parser rules generateAllRules (ruleGroups cf) -- inspect rules, only for debugging - -- ++ inspectListRulesByCategory (ruleGroups cf) + ++ inspectListRulesByCategory (ruleGroups cf) ] ++ -- End the WorkflowParser object endWorkflowClass ++ @@ -60,7 +61,7 @@ generateAllRules :: [(Cat, [Rule])] -> [Doc] generateAllRules catsAndRules = let -- Generate regular rules - mainRules = concatMap generateRuleGroup catsAndRules + mainRules = map text $ concatMap generateRuleGroup (fixCoercions catsAndRules) -- Generate special rules for integer and string if needed integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" catsAndRules @@ -69,35 +70,56 @@ generateAllRules catsAndRules = in mainRules ++ integerRule ++ stringRule -- | Generate a rule group (definition for a single category) -generateRuleGroup :: (Cat, [Rule]) -> [Doc] +generateRuleGroup :: (Cat, [Rule]) -> [String] generateRuleGroup (cat, rules) = - [text $ "def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ - [nest 4 $ generateRuleBody rules] ++ - [text "}"] + ["def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ + [replicate 4 ' ' ++ intercalate " | " (getRulesFunsName $ filter (not.isCoercion) rules)] ++ + ["}"] ++ + (generateRuleBody $ filter (not.isCoercion) rules) where catName = safeCatName cat --- | Generate the body of a rule -generateRuleBody :: [Rule] -> Doc -generateRuleBody rules@(Rule _ cat _ _ : _) = - if any (hasTokenCat catInteger) rules - then text $ firstLowerCase catInteger - else - let - vars = concatMap prPrintRule_ rules - exitCatName = firstLowerCase $ fromMaybe (error "Empty list encountered") $ listToMaybe $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars +-- | Based on a list of rules, generate the functions for the parser +generateRuleBody :: [Rule] -> [String] +generateRuleBody rules = + concatMap generateSingleRuleBody rules + where + generateSingleRuleBody :: Rule -> [String] + generateSingleRuleBody rule@(Rule fnam cat rhs _) = + let + isRecursiveRule = any (sameCat (wpThing cat)) (getRHSCats rhs) + ruleForm = if isRecursiveRule + then case rhsToSafeStrings rhs of + (_ : rest) -> "integer" : rest --TODO: we should get the "base" of the coercion recursion, in the calc is integer + [] -> [] -- Handle empty rhs case + else rhsToSafeStrings rhs + mainDef = [ + "def " ++ firstLowerCase (funName fnam) ++ ": Parser[WorkflowAST] =" + +++ intercalate " ~ " ruleForm ++ " ^^ { " ++ generateCaseStatement rule ++ " }" + ] + in mainDef + + +-- -- | Generate the body of a rule +-- generateRuleBody :: [Rule] -> Doc +-- generateRuleBody rules@(Rule _ cat _ _ : _) = +-- let +-- vars = concatMap prPrintRule_ rules +-- exitCatName = firstLowerCase $ fromMaybe (error "Empty list encountered") $ listToMaybe $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - headerText = exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" - subHeaderText = "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" +-- headerText = exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" +-- subHeaderText = "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" - caseStatements = map generateCaseStatement rules - in - text headerText $+$ - nest 4 (text subHeaderText) $+$ - nest 8 (vcat $ map text $ filter (not . null) caseStatements) $+$ - nest 4 (text "}") $+$ - text "}" -generateRuleBody [] = empty +-- caseStatements = map generateCaseStatement rules +-- in +-- text headerText $+$ +-- nest 4 (text subHeaderText) $+$ +-- nest 8 (vcat $ map text $ filter (not . null) caseStatements) $+$ +-- nest 4 (text "}") $+$ +-- text "}" +-- where +-- isListCat = isList $ wpThing cat +-- generateRuleBody [] = empty @@ -125,22 +147,17 @@ generateCaseStatement r@(Rule fun _ _ _) | isCoercion fun = "" | null vars = fnm ++ "()" -- Caso especial para lista vacía | otherwise = - "case (" ++ head vars ++ ", " ++ intercalate " ~ " (safeTail vars) ++ ") => " + "case (" ++ intercalate " ~ " vars ++ ") => " ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" where vars = disambiguateNames $ map modifyVars (prPrintRule_ r) fnm = funName fun - -- modifyVars str = if "()" `isSuffixOf` str - -- then getSymbFromName str - -- else case str of - -- (x:_) -> [toLower x] - -- [] -> error "Empty string encountered in modifyVars" modifyVars str | "()" `isSuffixOf` str = str | all isAlphaNum str = case str of (x:_) -> [toLower x] [] -> error "Empty string encountered in modifyVars" - | otherwise = getSymbFromName str -- Replace invalid symbols with placeho + | otherwise = getSymbFromName str -- Replace invalid symbols with placeholder -- | Generate a special rule for tokens like Integer or String @@ -177,15 +194,6 @@ onlySymbs = filter (isSuffixOf "()") filterNotEqual :: Eq a => a -> [a] -> [a] filterNotEqual element list = filter (/= element) list --- | Make variable names unique by adding numbers to duplicates -disambiguateNames :: [String] -> [String] -disambiguateNames = disamb [] - where - disamb ns1 (n:ns2) - | n `elem` (ns1 ++ ns2) = let i = length (filter (==n) ns1) + 1 - in (n ++ show i) : disamb (n:ns1) ns2 - | otherwise = n : disamb (n:ns1) ns2 - disamb _ [] = [] -- | Generate the imports section imports :: String -> [Doc] diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index c28899489..e20701da6 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -12,8 +12,8 @@ -} module BNFC.Backend.Scala.Utils ( - generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, - wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, + generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, + wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRulesFunsName, getRHSCats, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory ) where import BNFC.CF @@ -24,6 +24,7 @@ import BNFC.Utils (symbolToName) import Data.Char (toUpper) import Text.PrettyPrint import BNFC.PrettyPrint +import qualified Data.List generateVarsList :: [a] -> [String] @@ -122,6 +123,14 @@ safeRefCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeSc ListCat innerCat -> firstUpperCase (safeCatName innerCat) -- Handle ListCat explicitly _ -> firstLowerCase $ show cat +getRulesFunsName :: [Rule] -> [String] +getRulesFunsName rules = + let + catNames = [firstLowerCase $ wpThing fnam | Rule fnam _ _ _ <- rules] + uniqueCatNames = Data.List.nub catNames + in + uniqueCatNames + firstUpperCase :: String -> String firstUpperCase [] = [] firstUpperCase (x:xs) = toUpper x : xs @@ -135,7 +144,6 @@ getSymbFromName s = --- | Convert a category or string to its string representation -- | Convert a category or string to its string representation catToStrings :: [Either Cat String] -> [String] catToStrings = Prelude.map (\case @@ -143,6 +151,19 @@ catToStrings = Prelude.map (\case Right s -> s ) +-- | Gived a list of rhs, return the list vars in safe strings +-- | so for the EAdd it will return: ["exp", "PLUS()", "exp"] +rhsToSafeStrings :: [Either Cat String] -> [String] +rhsToSafeStrings = Prelude.map (\case + Left c -> firstLowerCase $ show $ normCat c + Right s -> case symbolToName s of + Just s' -> s' ++ "()" + Nothing -> s + ) +-- | Get all the Left Cat of the rhs of a rule +getRHSCats :: [Either Cat String] -> [Cat] +getRHSCats rhs = [c | Left c <- rhs] + -- | Get the function name for a rule getFunName :: Rule -> String getFunName (Rule fun _ _ _) = wpThing fun @@ -175,4 +196,14 @@ inspectListRulesByCategory rulesByCat = concatMap inspectRulesByCategory rulesBy isLeft :: Either a b -> Bool isLeft (Left _) = True -isLeft _ = False \ No newline at end of file +isLeft _ = False + +-- | Make variable names unique by adding numbers to duplicates +disambiguateNames :: [String] -> [String] +disambiguateNames = disamb [] + where + disamb ns1 (n:ns2) + | n `elem` (ns1 ++ ns2) = let i = length (Prelude.filter (==n) ns1) + 1 + in (n ++ show i) : disamb (n:ns1) ns2 + | otherwise = n : disamb (n:ns1) ns2 + disamb _ [] = [] \ No newline at end of file From f9e113166f2ea6f1d110e00a2b8f084c506107e1 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sat, 5 Apr 2025 20:38:52 +0200 Subject: [PATCH 29/42] minor fixes and clean up --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 64 +++---------------- source/src/BNFC/Backend/Scala/Utils.hs | 17 ++++- 2 files changed, 23 insertions(+), 58 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index a2636fbcb..e491996a4 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -19,7 +19,7 @@ import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) import GHC.Unicode (isAlphaNum) -import BNFC.Backend.Scala.Utils (safeTail, safeCatName, getSymbFromName, hasTokenCat, catToStrings, getFunName, inspectListRulesByCategory, getRulesFunsName, rhsToSafeStrings, disambiguateNames, getRHSCats) +import BNFC.Backend.Scala.Utils (safeCatName, getSymbFromName, hasTokenCat, getFunName, getRulesFunsName, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, safeCatToStrings) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) @@ -28,7 +28,6 @@ import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) import Data.List (find, intercalate, isSuffixOf) import Data.Char (toLower) import Data.Maybe (listToMaybe) -import System.Directory.Internal.Prelude (fromMaybe) import BNFC.Utils ((+++)) -- | Main function that generates the Scala parser code @@ -49,7 +48,7 @@ cf2ScalaParser Options{ lang } cf = vcat $ -- Add parser rules generateAllRules (ruleGroups cf) -- inspect rules, only for debugging - ++ inspectListRulesByCategory (ruleGroups cf) + -- ++ inspectListRulesByCategory (ruleGroups cf) ] ++ -- End the WorkflowParser object endWorkflowClass ++ @@ -95,51 +94,14 @@ generateRuleBody rules = else rhsToSafeStrings rhs mainDef = [ "def " ++ firstLowerCase (funName fnam) ++ ": Parser[WorkflowAST] =" - +++ intercalate " ~ " ruleForm ++ " ^^ { " ++ generateCaseStatement rule ++ " }" + +++ intercalate " ~ " ruleForm ++ + if isRuleOnlySpecials + then "" + else " ^^ { " ++ generateCaseStatement rule ++ " }" ] in mainDef - - --- -- | Generate the body of a rule --- generateRuleBody :: [Rule] -> Doc --- generateRuleBody rules@(Rule _ cat _ _ : _) = --- let --- vars = concatMap prPrintRule_ rules --- exitCatName = firstLowerCase $ fromMaybe (error "Empty list encountered") $ listToMaybe $ filterNotEqual (show (wpThing cat)) $ filterSymbs vars - --- headerText = exitCatName ++ " ~ rep((" ++ intercalate " | " (onlySymbs (safeTail vars)) ++ ") ~ " ++ exitCatName ++ ") ^^ {" --- subHeaderText = "case " ++ exitCatName ++ " ~ list => list.foldLeft(" ++ exitCatName ++ ") {" - --- caseStatements = map generateCaseStatement rules --- in --- text headerText $+$ --- nest 4 (text subHeaderText) $+$ --- nest 8 (vcat $ map text $ filter (not . null) caseStatements) $+$ --- nest 4 (text "}") $+$ --- text "}" --- where --- isListCat = isList $ wpThing cat --- generateRuleBody [] = empty - - - --- generateCaseStatement :: Rule -> String --- generateCaseStatement r@(Rule fun _ _ _) --- | isCoercion fun = "" --- | null vars = fnm ++ "()" -- Special case for an empty list --- | otherwise = --- "case (" ++ head vars ++ concatMap (" ~ " ++) (safeTail vars) ++ ") => " --- ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" --- where --- vars = disambiguateNames $ map modifyVars (prPrintRule_ r) --- fnm = funName fun --- modifyVars str --- | "()" `isSuffixOf` str = str --- | all isAlphaNum str = case str of --- (x:_) -> [toLower x] --- [] -> error "Empty string encountered in modifyVars" --- | otherwise = "sym" ++ show (length str) -- Replace invalid symbols with placeholders - + where + isRuleOnlySpecials = all isSpecialCat (getRHSCats rhs) -- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String @@ -179,21 +141,13 @@ generateSpecialRule tokenCat ruleName tokenName conversion catsAndRules = -- | Extract terminal and non-terminal symbols from a rule prPrintRule_ :: Rule -> [String] -prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ catToStrings items +prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings items -- | Filter out strings ending with "()" filterSymbs :: [String] -> [String] filterSymbs = filter (not . isSuffixOf "()") --- | Keep only strings ending with "()" -onlySymbs :: [String] -> [String] -onlySymbs = filter (isSuffixOf "()") - --- | Remove an element from a list -filterNotEqual :: Eq a => a -> [a] -> [a] -filterNotEqual element list = filter (/= element) list - -- | Generate the imports section imports :: String -> [Doc] diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index e20701da6..f3d61e227 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -12,8 +12,8 @@ -} module BNFC.Backend.Scala.Utils ( - generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, - wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRulesFunsName, getRHSCats, + generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, + wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRulesFunsName, getRHSCats, isSpecialCat, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory ) where import BNFC.CF @@ -142,6 +142,12 @@ getSymbFromName s = Just s -> s ++ "()" _ -> s +-- | Convert a category or string to its string representation +safeCatToStrings :: [Either Cat String] -> [String] +safeCatToStrings = Prelude.map (\case + Left c -> safeCatName c + Right s -> s + ) -- | Convert a category or string to its string representation @@ -155,7 +161,7 @@ catToStrings = Prelude.map (\case -- | so for the EAdd it will return: ["exp", "PLUS()", "exp"] rhsToSafeStrings :: [Either Cat String] -> [String] rhsToSafeStrings = Prelude.map (\case - Left c -> firstLowerCase $ show $ normCat c + Left c -> safeCatName $ normCat c Right s -> case symbolToName s of Just s' -> s' ++ "()" Nothing -> s @@ -172,6 +178,11 @@ getFunName (Rule fun _ _ _) = wpThing fun hasTokenCat :: TokenCat -> Rule -> Bool hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] +isSpecialCat :: Cat -> Bool +isSpecialCat (TokenCat cat) = cat `elem` baseTokenCatNames +isSpecialCat _ = False + + -- Function to generate a list of Doc from a (Cat, [Rule]) inspectRulesByCategory :: (Cat, [Rule]) -> [Doc] inspectRulesByCategory (cat, rules) = From 08777db01254489afb99584d78f7c2955b89e24c Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sun, 6 Apr 2025 20:27:11 +0200 Subject: [PATCH 30/42] improvement in LBNF parser generation --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 63 +++++++++++------ .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 36 ++-------- source/src/BNFC/Backend/Scala/Utils.hs | 68 +++++++++++++------ 3 files changed, 95 insertions(+), 72 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index e491996a4..da75d902b 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -19,13 +19,16 @@ import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) import GHC.Unicode (isAlphaNum) -import BNFC.Backend.Scala.Utils (safeCatName, getSymbFromName, hasTokenCat, getFunName, getRulesFunsName, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, safeCatToStrings) +import BNFC.Backend.Scala.Utils ( + safeCatName, getSymbFromName, hasTokenCat, getFunName, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, + safeCatToStrings, inspectListRulesByCategory, isListCat, isLeft, generateClassSignature + ) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) -import Data.List (find, intercalate, isSuffixOf) +import Data.List (find, intercalate, isSuffixOf, nub) import Data.Char (toLower) import Data.Maybe (listToMaybe) import BNFC.Utils ((+++)) @@ -48,7 +51,7 @@ cf2ScalaParser Options{ lang } cf = vcat $ -- Add parser rules generateAllRules (ruleGroups cf) -- inspect rules, only for debugging - -- ++ inspectListRulesByCategory (ruleGroups cf) + ++ inspectListRulesByCategory (ruleGroups cf) ] ++ -- End the WorkflowParser object endWorkflowClass ++ @@ -68,15 +71,26 @@ generateAllRules catsAndRules = in mainRules ++ integerRule ++ stringRule +getRuleFunName :: Rule -> String +getRuleFunName (Rule fnam _ _ _) = firstLowerCase $ funName fnam + +getRulesFunsName :: [Rule] -> [String] +getRulesFunsName rules = nub $ map getRuleFunName rules + -- | Generate a rule group (definition for a single category) generateRuleGroup :: (Cat, [Rule]) -> [String] generateRuleGroup (cat, rules) = ["def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ - [replicate 4 ' ' ++ intercalate " | " (getRulesFunsName $ filter (not.isCoercion) rules)] ++ + [replicate 4 ' ' ++ intercalate " | " (getRulesFunsName nonCoercionRules)] ++ ["}"] ++ - (generateRuleBody $ filter (not.isCoercion) rules) + concatMap generateRuleFor nonCoercionRules where catName = safeCatName cat + nonCoercionRules = filter (not . isCoercion) rules + -- rulesToProcess = filter (\rule -> not (isListCat (wpThing $ valRCat rule))) nonCoercionRules + + generateRuleFor :: Rule -> [String] + generateRuleFor rule = generateRuleBody [rule] -- | Based on a list of rules, generate the functions for the parser generateRuleBody :: [Rule] -> [String] @@ -84,24 +98,30 @@ generateRuleBody rules = concatMap generateSingleRuleBody rules where generateSingleRuleBody :: Rule -> [String] - generateSingleRuleBody rule@(Rule fnam cat rhs _) = + generateSingleRuleBody rule@(Rule _ cat rhs _) = let isRecursiveRule = any (sameCat (wpThing cat)) (getRHSCats rhs) ruleForm = if isRecursiveRule - then case rhsToSafeStrings rhs of - (_ : rest) -> "integer" : rest --TODO: we should get the "base" of the coercion recursion, in the calc is integer - [] -> [] -- Handle empty rhs case - else rhsToSafeStrings rhs + then case rhsToSafeStrings rule of + (_ : rest) -> "integer" : rest --TODO: we should get the "base" of the coercion recursion, in the calc is integer + [] -> [] -- Handle empty rhs case + else case rhs of + [Right _] -> [generateClassSignature rule False] + _ -> map addRuleForListCat (rhsToSafeStrings rule) mainDef = [ - "def " ++ firstLowerCase (funName fnam) ++ ": Parser[WorkflowAST] =" - +++ intercalate " ~ " ruleForm ++ + "def " ++ getRuleFunName rule ++ ": Parser[WorkflowAST] =" + +++ intercalate " ~ " ruleForm ++ if isRuleOnlySpecials then "" else " ^^ { " ++ generateCaseStatement rule ++ " }" ] in mainDef - where - isRuleOnlySpecials = all isSpecialCat (getRHSCats rhs) + where + addRuleForListCat s = + case find (\cat -> isListCat cat && safeCatName cat == s) (getRHSCats rhs) of + Just _ -> "rep(" ++ firstLowerCase s ++ ")" + Nothing -> s + isRuleOnlySpecials = all isSpecialCat (getRHSCats rhs) && all isLeft rhs -- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String @@ -114,12 +134,15 @@ generateCaseStatement r@(Rule fun _ _ _) where vars = disambiguateNames $ map modifyVars (prPrintRule_ r) fnm = funName fun - modifyVars str - | "()" `isSuffixOf` str = str - | all isAlphaNum str = case str of - (x:_) -> [toLower x] + cleanStrUnderscore str = case str of + (x:xs) -> case xs of + (y:_) -> if x == '_' then [toLower y] else [toLower x] + [] -> [toLower x] [] -> error "Empty string encountered in modifyVars" - | otherwise = getSymbFromName str -- Replace invalid symbols with placeholder + modifyVars str + | "()" `isSuffixOf` str = str + | all isAlphaNum str = cleanStrUnderscore str + | otherwise = cleanStrUnderscore $ getSymbFromName str -- Replace invalid symbols with placeholder -- | Generate a special rule for tokens like Integer or String @@ -143,12 +166,10 @@ generateSpecialRule tokenCat ruleName tokenName conversion catsAndRules = prPrintRule_ :: Rule -> [String] prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings items - -- | Filter out strings ending with "()" filterSymbs :: [String] -> [String] filterSymbs = filter (not . isSuffixOf "()") - -- | Generate the imports section imports :: String -> [Doc] imports name = [ diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 8d6cee54c..5a5e3781e 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -19,19 +19,16 @@ module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) where import Prelude hiding ((<>)) import BNFC.CF - ( baseTokenCatNames, - ruleGroups, + ( ruleGroups, CF, Cat(ListCat), - IsFun(funName, isCoercion), + IsFun(isCoercion), Rul(Rule), Rule, WithPosition(wpThing) ) import BNFC.PrettyPrint ( text, vcat, Doc ) import BNFC.Options ( SharedOptions(lang, Options) ) -import BNFC.Backend.Scala.Utils (generateVarsList, unwrapListCat, baseTypeToScalaType, wrapList, isLeft) -import Data.List (intercalate) -import BNFC.Utils ((+++)) +import BNFC.Backend.Scala.Utils (generateClassSignature) -- | Main function that generates the AST code cf2ScalaParserAST :: SharedOptions -> CF -> Doc @@ -60,30 +57,9 @@ isCoercionRule (Rule fun _ _ _) = isCoercion fun -- | Create a single case class definition createCaseClass :: Rule -> Doc -createCaseClass (Rule fun cat rhs _) - | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the lsit - | otherwise = text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" - where - className = funName fun - - -- Función para formatear parámetros según sean Cat o String - catParams :: Either Cat String -> String - catParams (Left c) = formatParamType c - catParams (Right _) = "WorkflowAST" - - -- Aplicamos `catParams` a cada elemento de `rhs` - params = intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) - filteredRhs = filter isLeft rhs - --- | Format a parameter with its type -formatParamType :: Cat -> String -formatParamType cat = - let baseCat = unwrapListCat cat -- Extraemos el TokenCat base - in if baseCat `elem` BNFC.CF.baseTokenCatNames - then case baseTypeToScalaType baseCat of - Just s -> wrapList cat s - Nothing -> "String" -- Default a "String" - else wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST +createCaseClass rule@(Rule _ cat _ _) + | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the list + | otherwise = text $ "case class " ++ generateClassSignature rule True ++ " extends WorkflowAST" -- | Generate the header part of the file diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index f3d61e227..101690a01 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -13,18 +13,20 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, - wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRulesFunsName, getRHSCats, isSpecialCat, - getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory + wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, generateClassSignature, + getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat ) where import BNFC.CF -import Data.Map +import Data.Map hiding (map, filter) import BNFC.Backend.Common.NamedVariables (firstLowerCase) import System.Directory.Internal.Prelude (fromMaybe) import BNFC.Utils (symbolToName) import Data.Char (toUpper) import Text.PrettyPrint import BNFC.PrettyPrint -import qualified Data.List +import Data.List (isSuffixOf) +import GHC.OldList (intercalate) +import BNFC.Utils ((+++)) generateVarsList :: [a] -> [String] @@ -107,12 +109,11 @@ safeHeadString :: [String] -> String safeHeadString [] = "" safeHeadString (x:_) = x - safeCatName :: Cat -> String safeCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) where notSafeScalaCatName = case cat of - ListCat innerCat -> "list" ++ firstUpperCase (safeCatName innerCat) -- Handle ListCat explicitly + ListCat innerCat -> firstUpperCase (safeCatName innerCat) _ -> firstLowerCase $ show cat @@ -120,16 +121,10 @@ safeRefCatName :: Cat -> String safeRefCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) where notSafeScalaCatName = case cat of - ListCat innerCat -> firstUpperCase (safeCatName innerCat) -- Handle ListCat explicitly + ListCat innerCat -> firstUpperCase (safeCatName innerCat) _ -> firstLowerCase $ show cat -getRulesFunsName :: [Rule] -> [String] -getRulesFunsName rules = - let - catNames = [firstLowerCase $ wpThing fnam | Rule fnam _ _ _ <- rules] - uniqueCatNames = Data.List.nub catNames - in - uniqueCatNames + firstUpperCase :: String -> String firstUpperCase [] = [] @@ -157,15 +152,41 @@ catToStrings = Prelude.map (\case Right s -> s ) +-- | Generate the class signature +generateClassSignature :: Rule -> Bool-> String +generateClassSignature (Rule fun _ rhs _) withParams = className ++ "(" ++ params ++ ")" + where + -- Function to format parameters based on whether they are Cat or String + catParams :: Either Cat String -> String + catParams (Left c) = formatParamType c + catParams (Right _) = "WorkflowAST" + + className = funName fun + params = if withParams then intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) else [] + filteredRhs = filter isLeft rhs + + +-- | Format a parameter with its type +formatParamType :: Cat -> String +formatParamType cat = + let baseCat = unwrapListCat cat -- Extraemos el TokenCat base + in if baseCat `elem` BNFC.CF.baseTokenCatNames + then case baseTypeToScalaType baseCat of + Just s -> wrapList cat s + Nothing -> "String" -- Default a "String" + else wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST + + -- | Gived a list of rhs, return the list vars in safe strings -- | so for the EAdd it will return: ["exp", "PLUS()", "exp"] -rhsToSafeStrings :: [Either Cat String] -> [String] -rhsToSafeStrings = Prelude.map (\case - Left c -> safeCatName $ normCat c - Right s -> case symbolToName s of - Just s' -> s' ++ "()" - Nothing -> s - ) +rhsToSafeStrings :: Rule -> [String] +rhsToSafeStrings rule@(Rule _ _ rhs _) = Prelude.map (\case + Left c -> safeCatName $ normCat c + Right s -> case symbolToName s of + Just s' -> s' ++ "()" + Nothing -> generateClassSignature rule False + ) rhs + -- | Get all the Left Cat of the rhs of a rule getRHSCats :: [Either Cat String] -> [Cat] getRHSCats rhs = [c | Left c <- rhs] @@ -209,11 +230,16 @@ isLeft :: Either a b -> Bool isLeft (Left _) = True isLeft _ = False +isListCat :: Cat -> Bool +isListCat (ListCat _) = True +isListCat _ = False + -- | Make variable names unique by adding numbers to duplicates disambiguateNames :: [String] -> [String] disambiguateNames = disamb [] where disamb ns1 (n:ns2) + | "()" `isSuffixOf` n = n : disamb (n:ns1) ns2 | n `elem` (ns1 ++ ns2) = let i = length (Prelude.filter (==n) ns1) + 1 in (n ++ show i) : disamb (n:ns1) ns2 | otherwise = n : disamb (n:ns1) ns2 From 7a488bb57fdb9cb56f4de3248ddbbcab935e2343 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 7 Apr 2025 20:35:48 +0200 Subject: [PATCH 31/42] almost there with lbnf parser generation --- .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 9 ++ .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 76 ++++++------- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 36 +++++- source/src/BNFC/Backend/Scala/Utils.hs | 106 +++++++----------- 4 files changed, 118 insertions(+), 109 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index 4d8e56333..711cd2897 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -35,10 +35,12 @@ cf2ScalaLexToken Options{ lang } cf = vsep . concat $ headers lang , [text $ concat $ map generateSymbClass symbs] , [generateStringClasses liters] + , [generateKeyWordClasses keyWords] ] where liters = literals cf symbs = unicodeAndSymbols cf + keyWords = reservedWords cf generateSymbClass :: String -> String @@ -47,6 +49,13 @@ generateSymbClass symb = case symbolToName symb of Nothing -> "" +generateKeyWordClasses :: [String] -> Doc +generateKeyWordClasses params = text $ concat $ map generateKeyWordClass params + +generateKeyWordClass :: String -> String +generateKeyWordClass param = "case class " ++ (map toUpper param) ++ "() extends WorkflowToken \n" + + generateStringClasses :: [String] -> Doc generateStringClasses params = text $ concat $ map generateStringClass params diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index da75d902b..09bf89a04 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -19,10 +19,7 @@ import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) import GHC.Unicode (isAlphaNum) -import BNFC.Backend.Scala.Utils ( - safeCatName, getSymbFromName, hasTokenCat, getFunName, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, - safeCatToStrings, inspectListRulesByCategory, isListCat, isLeft, generateClassSignature - ) +import BNFC.Backend.Scala.Utils (safeCatName, getSymbFromName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, safeCatToStrings, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) @@ -32,6 +29,7 @@ import Data.List (find, intercalate, isSuffixOf, nub) import Data.Char (toLower) import Data.Maybe (listToMaybe) import BNFC.Utils ((+++)) +import Data.Char (toUpper) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -62,12 +60,15 @@ cf2ScalaParser Options{ lang } cf = vcat $ generateAllRules :: [(Cat, [Rule])] -> [Doc] generateAllRules catsAndRules = let + -- filter categories with all the rules isNilCons + -- TODO: check if this is correct, I'm assuming all the lists are processed using the rep and the scala List + rulesToProcess = filter (not . all isNilCons . snd) catsAndRules -- Generate regular rules - mainRules = map text $ concatMap generateRuleGroup (fixCoercions catsAndRules) + mainRules = map text $ concatMap generateRuleGroup (fixCoercions rulesToProcess) -- Generate special rules for integer and string if needed - integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" catsAndRules - stringRule = generateSpecialRule catString "string" "STRING" "" catsAndRules + integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" "Integer" catsAndRules + stringRule = generateSpecialRule catString "string" "STRING" "toString" "String" catsAndRules in mainRules ++ integerRule ++ stringRule @@ -102,12 +103,12 @@ generateRuleBody rules = let isRecursiveRule = any (sameCat (wpThing cat)) (getRHSCats rhs) ruleForm = if isRecursiveRule - then case rhsToSafeStrings rule of - (_ : rest) -> "integer" : rest --TODO: we should get the "base" of the coercion recursion, in the calc is integer + then case rhsToSafeStrings rhs of + (_ : rest) -> "integer" : rest -- TODO: we should get the "base" of the coercion recursion, in the calc is integer [] -> [] -- Handle empty rhs case else case rhs of - [Right _] -> [generateClassSignature rule False] - _ -> map addRuleForListCat (rhsToSafeStrings rule) + [Right s] -> [map toUpper s ++ "()"] + _ -> map addRuleForListCat (rhsToSafeStrings rhs) mainDef = [ "def " ++ getRuleFunName rule ++ ": Parser[WorkflowAST] =" +++ intercalate " ~ " ruleForm ++ @@ -115,7 +116,7 @@ generateRuleBody rules = then "" else " ^^ { " ++ generateCaseStatement rule ++ " }" ] - in mainDef + in if (not.isNilCons) rule then mainDef else [""] where addRuleForListCat s = case find (\cat -> isListCat cat && safeCatName cat == s) (getRHSCats rhs) of @@ -125,51 +126,48 @@ generateRuleBody rules = -- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String -generateCaseStatement r@(Rule fun _ _ _) +generateCaseStatement rule@(Rule fun _ _ _) | isCoercion fun = "" - | null vars = fnm ++ "()" -- Caso especial para lista vacía + | null vars = fnm ++ "()" | otherwise = - "case (" ++ intercalate " ~ " vars ++ ") => " - ++ fnm ++ "(" ++ intercalate ", " (filterSymbs vars) ++ ")" + "case (" ++ intercalate " ~ " params ++ ") => " + ++ fnm ++ "(" ++ intercalate ", " vars ++ ")" where - vars = disambiguateNames $ map modifyVars (prPrintRule_ r) + getRHSParamsFromRule :: Rule -> [String] + getRHSParamsFromRule (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings items + + getFunVarsFromRule :: Rule -> [String] + getFunVarsFromRule (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings $ filter isLeft items + + addCastToVar :: String -> String + addCastToVar str = str ++ ".asInstanceOf[WorkflowAST]" + + params = disambiguateNames $ map modifyVars $ concatMap getRHSParamsFromRule [rule] + vars = map addCastToVar $ disambiguateNames $ map modifyVars (getFunVarsFromRule rule) fnm = funName fun - cleanStrUnderscore str = case str of - (x:xs) -> case xs of - (y:_) -> if x == '_' then [toLower y] else [toLower x] - [] -> [toLower x] - [] -> error "Empty string encountered in modifyVars" + modifyVars str - | "()" `isSuffixOf` str = str - | all isAlphaNum str = cleanStrUnderscore str - | otherwise = cleanStrUnderscore $ getSymbFromName str -- Replace invalid symbols with placeholder + | "()" `isSuffixOf` str = "_" + | all isAlphaNum str = [toLower $ safeHeadChar str] + | otherwise = [toLower $ safeHeadChar $ getSymbFromName str] -- Replace invalid symbols with placeholder, this should be already done but is being manage here for safety -- | Generate a special rule for tokens like Integer or String -generateSpecialRule :: TokenCat -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] -generateSpecialRule tokenCat ruleName tokenName conversion catsAndRules = +generateSpecialRule :: TokenCat -> String -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] +generateSpecialRule tokenCat ruleName tokenName conversion scalaType catsAndRules = case find (\(_, rules) -> any (hasTokenCat tokenCat) rules) catsAndRules of Just (_, rules) -> case find (hasTokenCat tokenCat) rules of - Just rule -> + Just _ -> let - funName = getFunName rule conversionPart = if null conversion then "" else "." ++ conversion in - [ text $ "def " ++ ruleName ++ ": Parser[" ++ funName ++ "] = positioned {" - , nest 4 $ text $ "accept(\"" ++ ruleName ++ "\", { case " ++ tokenName ++ "(i) => " ++ funName ++ "(i" ++ conversionPart ++ ") })" + [ text $ "def " ++ ruleName ++ ": Parser[" ++ scalaType ++ "] = {" + , nest 4 $ text $ "accept(\"" ++ ruleName ++ "\", { case " ++ tokenName ++ "(i) => i" ++ conversionPart ++ " })" , text "}" ] Nothing -> [] Nothing -> [] --- | Extract terminal and non-terminal symbols from a rule -prPrintRule_ :: Rule -> [String] -prPrintRule_ (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings items - --- | Filter out strings ending with "()" -filterSymbs :: [String] -> [String] -filterSymbs = filter (not . isSuffixOf "()") - -- | Generate the imports section imports :: String -> [Doc] imports name = [ diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 5a5e3781e..8d6cee54c 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -19,16 +19,19 @@ module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) where import Prelude hiding ((<>)) import BNFC.CF - ( ruleGroups, + ( baseTokenCatNames, + ruleGroups, CF, Cat(ListCat), - IsFun(isCoercion), + IsFun(funName, isCoercion), Rul(Rule), Rule, WithPosition(wpThing) ) import BNFC.PrettyPrint ( text, vcat, Doc ) import BNFC.Options ( SharedOptions(lang, Options) ) -import BNFC.Backend.Scala.Utils (generateClassSignature) +import BNFC.Backend.Scala.Utils (generateVarsList, unwrapListCat, baseTypeToScalaType, wrapList, isLeft) +import Data.List (intercalate) +import BNFC.Utils ((+++)) -- | Main function that generates the AST code cf2ScalaParserAST :: SharedOptions -> CF -> Doc @@ -57,9 +60,30 @@ isCoercionRule (Rule fun _ _ _) = isCoercion fun -- | Create a single case class definition createCaseClass :: Rule -> Doc -createCaseClass rule@(Rule _ cat _ _) - | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the list - | otherwise = text $ "case class " ++ generateClassSignature rule True ++ " extends WorkflowAST" +createCaseClass (Rule fun cat rhs _) + | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the lsit + | otherwise = text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" + where + className = funName fun + + -- Función para formatear parámetros según sean Cat o String + catParams :: Either Cat String -> String + catParams (Left c) = formatParamType c + catParams (Right _) = "WorkflowAST" + + -- Aplicamos `catParams` a cada elemento de `rhs` + params = intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) + filteredRhs = filter isLeft rhs + +-- | Format a parameter with its type +formatParamType :: Cat -> String +formatParamType cat = + let baseCat = unwrapListCat cat -- Extraemos el TokenCat base + in if baseCat `elem` BNFC.CF.baseTokenCatNames + then case baseTypeToScalaType baseCat of + Just s -> wrapList cat s + Nothing -> "String" -- Default a "String" + else wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST -- | Generate the header part of the file diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 101690a01..3360afbcc 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -13,11 +13,11 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, - wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, generateClassSignature, + wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat ) where import BNFC.CF -import Data.Map hiding (map, filter) +import Data.Map import BNFC.Backend.Common.NamedVariables (firstLowerCase) import System.Directory.Internal.Prelude (fromMaybe) import BNFC.Utils (symbolToName) @@ -25,8 +25,6 @@ import Data.Char (toUpper) import Text.PrettyPrint import BNFC.PrettyPrint import Data.List (isSuffixOf) -import GHC.OldList (intercalate) -import BNFC.Utils ((+++)) generateVarsList :: [a] -> [String] @@ -70,33 +68,33 @@ reserverWordsMap = fromList wordsMap wordsMap :: [(String, String)] wordsMap = [ - ("def" , "_def") - , ("val" , "_val") - , ("var" , "_var") - , ("class", "_class") - , ("object", "_object") - , ("trait", "_trait") - , ("extends", "_extends") - , ("with", "_with") - , ("case", "_case") - , ("sealed", "_sealed") - , ("abstract", "_abstract") - , ("final", "_final") - , ("override", "_override") - , ("implicit", "_implicit") - , ("lazy", "_lazy") - , ("private", "_private") - , ("protected", "_protected") - , ("public", "_public") - , ("import", "_import") - , ("package", "_package") - , ("return", "_return") - , ("if", "_if") - , ("else", "_else") - , ("while", "_while") - , ("for", "_for") - , ("do", "_do") - , ("match", "_match") + ("def" , "pdef") + , ("val" , "pval") + , ("var" , "pvar") + , ("class", "pclass") + , ("object", "pobject") + , ("trait", "ptrait") + , ("extends", "pextends") + , ("with", "pwith") + , ("case", "pcase") + , ("sealed", "psealed") + , ("abstract", "pabstract") + , ("final", "pfinal") + , ("override", "poverride") + , ("implicit", "pimplicit") + , ("lazy", "plazy") + , ("private", "pprivate") + , ("protected", "pprotected") + , ("public", "ppublic") + , ("import", "pimport") + , ("package", "ppackage") + , ("return", "preturn") + , ("if", "pif") + , ("else", "pelse") + , ("while", "pwhile") + , ("for", "pfor") + , ("do", "pdo") + , ("match", "pmatch") ] -- | Safe version of tail that returns an empty list for an empty list @@ -109,6 +107,11 @@ safeHeadString :: [String] -> String safeHeadString [] = "" safeHeadString (x:_) = x +-- | Safe version of head that returns an empty list for an empty list +safeHeadChar :: [Char] -> Char +safeHeadChar [] = ' ' +safeHeadChar (x:_) = x + safeCatName :: Cat -> String safeCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) where @@ -152,41 +155,15 @@ catToStrings = Prelude.map (\case Right s -> s ) --- | Generate the class signature -generateClassSignature :: Rule -> Bool-> String -generateClassSignature (Rule fun _ rhs _) withParams = className ++ "(" ++ params ++ ")" - where - -- Function to format parameters based on whether they are Cat or String - catParams :: Either Cat String -> String - catParams (Left c) = formatParamType c - catParams (Right _) = "WorkflowAST" - - className = funName fun - params = if withParams then intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) else [] - filteredRhs = filter isLeft rhs - - --- | Format a parameter with its type -formatParamType :: Cat -> String -formatParamType cat = - let baseCat = unwrapListCat cat -- Extraemos el TokenCat base - in if baseCat `elem` BNFC.CF.baseTokenCatNames - then case baseTypeToScalaType baseCat of - Just s -> wrapList cat s - Nothing -> "String" -- Default a "String" - else wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST - - -- | Gived a list of rhs, return the list vars in safe strings -- | so for the EAdd it will return: ["exp", "PLUS()", "exp"] -rhsToSafeStrings :: Rule -> [String] -rhsToSafeStrings rule@(Rule _ _ rhs _) = Prelude.map (\case - Left c -> safeCatName $ normCat c - Right s -> case symbolToName s of - Just s' -> s' ++ "()" - Nothing -> generateClassSignature rule False - ) rhs - +rhsToSafeStrings :: [Either Cat String] -> [String] +rhsToSafeStrings = Prelude.map (\case + Left c -> safeCatName $ normCat c + Right s -> case symbolToName s of + Just s' -> s' ++ "()" + Nothing -> (Prelude.map toUpper s) ++ "()" + ) -- | Get all the Left Cat of the rhs of a rule getRHSCats :: [Either Cat String] -> [Cat] getRHSCats rhs = [c | Left c <- rhs] @@ -239,6 +216,7 @@ disambiguateNames :: [String] -> [String] disambiguateNames = disamb [] where disamb ns1 (n:ns2) + | n == "_" = n : disamb (n:ns1) ns2 | "()" `isSuffixOf` n = n : disamb (n:ns1) ns2 | n `elem` (ns1 ++ ns2) = let i = length (Prelude.filter (==n) ns1) + 1 in (n ++ show i) : disamb (n:ns1) ns2 From ba4bccd94839342815c9b212dd2df2049ac3dbb4 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Wed, 9 Apr 2025 00:35:09 +0200 Subject: [PATCH 32/42] improves in LBNF compiler generation --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 117 +++++++++++------- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 55 ++++---- source/src/BNFC/Backend/Scala/Utils.hs | 10 +- 3 files changed, 111 insertions(+), 71 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 09bf89a04..0d7b3db1d 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -19,17 +19,16 @@ import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) import GHC.Unicode (isAlphaNum) -import BNFC.Backend.Scala.Utils (safeCatName, getSymbFromName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, safeCatToStrings, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar) +import BNFC.Backend.Scala.Utils (safeCatName, getSymbFromName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, safeCatToStrings, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, disambiguateTuples, baseTypeToScalaType) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) import Data.List (find, intercalate, isSuffixOf, nub) -import Data.Char (toLower) +import Data.Char (toLower, toUpper) import Data.Maybe (listToMaybe) import BNFC.Utils ((+++)) -import Data.Char (toUpper) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -67,10 +66,12 @@ generateAllRules catsAndRules = mainRules = map text $ concatMap generateRuleGroup (fixCoercions rulesToProcess) -- Generate special rules for integer and string if needed - integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" "Integer" catsAndRules - stringRule = generateSpecialRule catString "string" "STRING" "toString" "String" catsAndRules + integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" "pInteger" catsAndRules + stringRule = generateSpecialRule catString "string" "STRING" "toString" "pString" catsAndRules + charRule = generateSpecialRule catChar "char" "CHAR" "charAt(0)" "pChar" catsAndRules + identRule = generateSpecialRule catIdent "ident" "IDENT" "toString" "pString" catsAndRules - in mainRules ++ integerRule ++ stringRule + in mainRules ++ integerRule ++ stringRule ++ charRule ++ identRule getRuleFunName :: Rule -> String getRuleFunName (Rule fnam _ _ _) = firstLowerCase $ funName fnam @@ -95,74 +96,98 @@ generateRuleGroup (cat, rules) = -- | Based on a list of rules, generate the functions for the parser generateRuleBody :: [Rule] -> [String] -generateRuleBody rules = - concatMap generateSingleRuleBody rules - where - generateSingleRuleBody :: Rule -> [String] - generateSingleRuleBody rule@(Rule _ cat rhs _) = - let - isRecursiveRule = any (sameCat (wpThing cat)) (getRHSCats rhs) - ruleForm = if isRecursiveRule - then case rhsToSafeStrings rhs of - (_ : rest) -> "integer" : rest -- TODO: we should get the "base" of the coercion recursion, in the calc is integer - [] -> [] -- Handle empty rhs case - else case rhs of - [Right s] -> [map toUpper s ++ "()"] - _ -> map addRuleForListCat (rhsToSafeStrings rhs) - mainDef = [ - "def " ++ getRuleFunName rule ++ ": Parser[WorkflowAST] =" - +++ intercalate " ~ " ruleForm ++ - if isRuleOnlySpecials - then "" - else " ^^ { " ++ generateCaseStatement rule ++ " }" - ] - in if (not.isNilCons) rule then mainDef else [""] - where - addRuleForListCat s = - case find (\cat -> isListCat cat && safeCatName cat == s) (getRHSCats rhs) of - Just _ -> "rep(" ++ firstLowerCase s ++ ")" - Nothing -> s - isRuleOnlySpecials = all isSpecialCat (getRHSCats rhs) && all isLeft rhs +generateRuleBody rules = concatMap generateSingleRuleBody rules + +generateSingleRuleBody :: Rule -> [String] +generateSingleRuleBody rule@(Rule _ _ _ _) = [generateRuleDefinition rule ++ generateRuleTransformation rule] + +generateRuleDefinition :: Rule -> String +generateRuleDefinition rule = + "def " ++ getRuleFunName rule ++ ": Parser[WorkflowAST] =" + +++ intercalate " ~ " (generateRuleForm rule) + +generateRuleForm :: Rule -> [String] +generateRuleForm rule@(Rule _ _ rhs _) = + if isRecursiveRule rule + then case rhsToSafeStrings rhs of + (_ : rest) -> "integer" : rest + [] -> [""] -- Handle empty rhs case + else case rhs of + [Right s] -> [map toUpper s ++ "()"] + _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) + +generateRuleTransformation :: Rule -> String +generateRuleTransformation rule = + if isRuleOnlySpecials rule + then case rhsRule rule of + [] -> "EMPTY() ^^ {" ++ generateCaseStatement rule ++ "}" + _ -> "" + else " ^^ { " ++ generateCaseStatement rule ++ " }" + +isRecursiveRule :: Rule -> Bool +isRecursiveRule (Rule _ cat rhs _) = + any (sameCat (wpThing cat)) (getRHSCats rhs) + +addRuleForListCat :: [Either Cat String] -> String -> String +addRuleForListCat rhs s = + case find (\cat -> isListCat cat && safeCatName cat == s) (getRHSCats rhs) of + Just _ -> "rep(" ++ firstLowerCase s ++ ")" + Nothing -> s + +isRuleOnlySpecials :: Rule -> Bool +isRuleOnlySpecials (Rule _ _ rhs _) = + all isSpecialCat (getRHSCats rhs) && all isLeft rhs -- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String generateCaseStatement rule@(Rule fun _ _ _) | isCoercion fun = "" - | null vars = fnm ++ "()" + | null vars = "_ => " ++ fnm ++ "()" | otherwise = "case (" ++ intercalate " ~ " params ++ ") => " ++ fnm ++ "(" ++ intercalate ", " vars ++ ")" where getRHSParamsFromRule :: Rule -> [String] - getRHSParamsFromRule (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings items + getRHSParamsFromRule (Rule _ _ items _) = + disambiguateNames $ map modifyParams $ map getSymbFromName $ safeCatToStrings items - getFunVarsFromRule :: Rule -> [String] - getFunVarsFromRule (Rule _ _ items _) = map getSymbFromName $ safeCatToStrings $ filter isLeft items + getFunVarsWithTypeFromRule :: Rule -> [(String, String)] + getFunVarsWithTypeFromRule (Rule _ _ items _) = + disambiguateTuples $ map (\(c, s) -> (modifyVars (getSymbFromName (safeCatName c)), s)) $ map addTypesToVars $ getRHSCats items - addCastToVar :: String -> String - addCastToVar str = str ++ ".asInstanceOf[WorkflowAST]" + getBaseType :: Cat -> String + getBaseType cat + | isListCat cat = "List[WorkflowAST]" + | otherwise = "WorkflowAST" - params = disambiguateNames $ map modifyVars $ concatMap getRHSParamsFromRule [rule] - vars = map addCastToVar $ disambiguateNames $ map modifyVars (getFunVarsFromRule rule) + addTypesToVars :: Cat -> (Cat, String) + addTypesToVars cat = (cat, ".asInstanceOf[" ++ getBaseType cat ++ "]") + + params = concatMap getRHSParamsFromRule [rule] + vars = map (\(var, typ) -> var ++ typ) $ getFunVarsWithTypeFromRule rule fnm = funName fun modifyVars str + | all isAlphaNum str = [toLower $ safeHeadChar str] + | otherwise = [toLower $ safeHeadChar $ getSymbFromName str] + + modifyParams str | "()" `isSuffixOf` str = "_" | all isAlphaNum str = [toLower $ safeHeadChar str] - | otherwise = [toLower $ safeHeadChar $ getSymbFromName str] -- Replace invalid symbols with placeholder, this should be already done but is being manage here for safety + | otherwise = [toLower $ safeHeadChar $ getSymbFromName str] -- | Generate a special rule for tokens like Integer or String generateSpecialRule :: TokenCat -> String -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] -generateSpecialRule tokenCat ruleName tokenName conversion scalaType catsAndRules = +generateSpecialRule tokenCat ruleName tokenName conversion pTypeName catsAndRules = case find (\(_, rules) -> any (hasTokenCat tokenCat) rules) catsAndRules of Just (_, rules) -> case find (hasTokenCat tokenCat) rules of Just _ -> let conversionPart = if null conversion then "" else "." ++ conversion in - [ text $ "def " ++ ruleName ++ ": Parser[" ++ scalaType ++ "] = {" - , nest 4 $ text $ "accept(\"" ++ ruleName ++ "\", { case " ++ tokenName ++ "(i) => i" ++ conversionPart ++ " })" + [ text $ "def " ++ ruleName ++ ": Parser[" ++ pTypeName ++ "] = {" + , nest 4 $ text $ "accept(\"" ++ ruleName ++ "\", { case " ++ tokenName ++ "(i) => " ++ pTypeName ++ "(i" ++ conversionPart ++ ") })" , text "}" ] Nothing -> [] diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 8d6cee54c..a006493e0 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -19,19 +19,19 @@ module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) where import Prelude hiding ((<>)) import BNFC.CF - ( baseTokenCatNames, - ruleGroups, + ( ruleGroups, CF, Cat(ListCat), IsFun(funName, isCoercion), Rul(Rule), Rule, - WithPosition(wpThing) ) + WithPosition(wpThing), TokenCat, literals ) import BNFC.PrettyPrint ( text, vcat, Doc ) import BNFC.Options ( SharedOptions(lang, Options) ) -import BNFC.Backend.Scala.Utils (generateVarsList, unwrapListCat, baseTypeToScalaType, wrapList, isLeft) +import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList) import Data.List (intercalate) import BNFC.Utils ((+++)) +import Data.Maybe (fromMaybe) -- | Main function that generates the AST code cf2ScalaParserAST :: SharedOptions -> CF -> Doc @@ -41,9 +41,11 @@ cf2ScalaParserAST Options{ lang } cf = vcat $ -- Add an empty line after the trait definition [text ""] ++ -- Generate case class definitions - generateRuleDefs rules + generateRuleDefs rules ++ + generateLiteralsDefs allLiterals where rules = ruleGroups cf + allLiterals = literals cf -- | Generate all case class definitions generateRuleDefs :: [(Cat, [Rule])] -> [Doc] @@ -58,33 +60,40 @@ processRuleGroup (_, rules) = map createCaseClass (filter (not . isCoercionRule) isCoercionRule :: Rule -> Bool isCoercionRule (Rule fun _ _ _) = isCoercion fun --- | Create a single case class definition -createCaseClass :: Rule -> Doc -createCaseClass (Rule fun cat rhs _) - | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the lsit - | otherwise = text $ "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" +-- | Generate the class params +generateClassParams :: Rule -> String +generateClassParams (Rule _ _ rhs _) = + intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) where - className = funName fun - - -- Función para formatear parámetros según sean Cat o String + -- Function to format parameters based on whether they are Cat or String catParams :: Either Cat String -> String catParams (Left c) = formatParamType c catParams (Right _) = "WorkflowAST" - -- Aplicamos `catParams` a cada elemento de `rhs` - params = intercalate ", " $ zipWith (\x y -> x ++ ":" +++ y) (generateVarsList filteredRhs) (map catParams filteredRhs) filteredRhs = filter isLeft rhs + -- | Format a parameter with its type formatParamType :: Cat -> String -formatParamType cat = - let baseCat = unwrapListCat cat -- Extraemos el TokenCat base - in if baseCat `elem` BNFC.CF.baseTokenCatNames - then case baseTypeToScalaType baseCat of - Just s -> wrapList cat s - Nothing -> "String" -- Default a "String" - else wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST +formatParamType cat = wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST + +-- | Create a single case class definition +createCaseClass :: Rule -> Doc +createCaseClass rule@(Rule fun cat _ _) + | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the list + | otherwise = text $ formatCaseClass className params + where + className = funName fun + -- Apply `catParams` to each element of `rhs` + params = generateClassParams rule + +-- | Helper function to format the case class definition +formatCaseClass :: String -> String -> String +formatCaseClass className params = "case class " ++ className ++ "(" ++ params ++ ") extends WorkflowAST" +-- | Generate the Scala types for basic LBNF types +generateLiteralsDefs :: [TokenCat] -> [Doc] +generateLiteralsDefs tokens = map (\token -> text $ formatCaseClass ("p" ++ token) ("var1: " ++ fromMaybe token (baseTypeToScalaType token))) tokens -- | Generate the header part of the file headers :: String -> [Doc] @@ -94,4 +103,4 @@ headers name = [ text "import scala.util.parsing.input.Positional", text "", text "sealed trait WorkflowAST extends Positional" - ] + ] \ No newline at end of file diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 3360afbcc..1446b671f 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -14,7 +14,7 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, - getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat + getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples ) where import BNFC.CF import Data.Map @@ -221,4 +221,10 @@ disambiguateNames = disamb [] | n `elem` (ns1 ++ ns2) = let i = length (Prelude.filter (==n) ns1) + 1 in (n ++ show i) : disamb (n:ns1) ns2 | otherwise = n : disamb (n:ns1) ns2 - disamb _ [] = [] \ No newline at end of file + disamb _ [] = [] + +disambiguateTuples :: [(String, String)] -> [(String, String)] +disambiguateTuples tuples = + let (names, values) = unzip tuples + newNames = disambiguateNames names + in zip newNames values \ No newline at end of file From d980756f03804f7044e146e8018fbdc87fc3f22d Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sun, 13 Apr 2025 00:04:28 +0200 Subject: [PATCH 33/42] minichi working --- source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 17 +-- .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 15 ++- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 104 +++++++++--------- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 25 +++-- source/src/BNFC/Backend/Scala/Utils.hs | 17 ++- 5 files changed, 104 insertions(+), 74 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index e788239cb..d5b6713c8 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -24,6 +24,8 @@ import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) import Data.List (intercalate) import Data.Char (toLower) +import GHC.OldList (nub) +import GHC.Unicode (toUpper) -- | Converts a string to lowercase toLowerString :: String -> String @@ -43,17 +45,18 @@ cf2ScalaLex Options{ lang } cf = vcat $ -- Add apply function getApplyFunction ++ -- Add tokens function - getTokensFunction (symbs ++ liters) ++ + getTokensFunction (symbs ++ keyWords ++ liters) ++ -- Add parser functions for literals map addParserFunction liters ++ -- Add keyword parsers - map getKeywordParser symbs + map getBaseLexs (keyWords ++ symbs) ] ++ -- End the WorkflowLexer object endWorkflowClass where + keyWords = reservedWords cf symbs = unicodeAndSymbols cf - liters = literals cf + liters = nub $ literals cf -- | Generate a parser function for a specific literal type addParserFunction :: String -> Doc @@ -166,7 +169,7 @@ getSymbFromName :: String -> String getSymbFromName s = case symbolToName s of Just s -> s - _ -> s + _ -> map toUpper s -- | Generate the tokens function getTokensFunction :: [String] -> [Doc] @@ -186,7 +189,7 @@ getListSymNames :: [String] -> [String] getListSymNames = map getSymName -- | Generate a keyword parser for a symbol -getKeywordParser :: String -> Doc -getKeywordParser symb = +getBaseLexs :: String -> Doc +getBaseLexs symb = text "" $+$ - text ("def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ => " ++ getSymbFromName symb ++ "()) }") \ No newline at end of file + text ("def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ => " ++ getSymbFromName symb ++ "()) }") diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index 711cd2897..342bd0bdb 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -25,6 +25,8 @@ import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) import BNFC.Utils (symbolToName) import Data.Char (toUpper) +import System.Directory.Internal.Prelude (toLower) +import Data.List (nub) cf2ScalaLexToken :: SharedOptions @@ -33,12 +35,15 @@ cf2ScalaLexToken cf2ScalaLexToken Options{ lang } cf = vsep . concat $ [ headers lang - , [text $ concat $ map generateSymbClass symbs] - , [generateStringClasses liters] - , [generateKeyWordClasses keyWords] + , [text $ concat $ map generateSymbClass (symbs)] + , [generateStringClasses (filter (\x -> map toLower x `notElem` map (map toLower) symbs) liters)] + , [generateKeyWordClasses (filter (\x -> map toLower x `notElem` map (map toLower) (symbs ++ liters)) (keyWords ++ ["empty"]))] + -- , [text $ "Symbols: " ++ show symbs] + -- , [text $ "Literals: " ++ show liters] + -- , [text $ "Keywords: " ++ show keyWords] ] where - liters = literals cf + liters = nub $ literals cf symbs = unicodeAndSymbols cf keyWords = reservedWords cf @@ -46,7 +51,7 @@ cf2ScalaLexToken Options{ lang } cf = vsep . concat $ generateSymbClass :: String -> String generateSymbClass symb = case symbolToName symb of Just s -> "case class " ++ s ++ "() extends WorkflowToken \n" - Nothing -> "" + Nothing -> mempty generateKeyWordClasses :: [String] -> Doc diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 0d7b3db1d..c1d65a119 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -17,18 +17,19 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) -import GHC.Unicode (isAlphaNum) -import BNFC.Backend.Scala.Utils (safeCatName, getSymbFromName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, safeCatToStrings, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, disambiguateTuples, baseTypeToScalaType) +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) -import Data.List (find, intercalate, isSuffixOf, nub) -import Data.Char (toLower, toUpper) -import Data.Maybe (listToMaybe) -import BNFC.Utils ((+++)) +import Data.List (find, intercalate, nub) +import Data.Char (toLower) +import Data.Maybe (listToMaybe, fromMaybe, isJust) +import BNFC.Utils ((+++), symbolToName) +import BNFC.Backend.Scala.CFtoScalaParserAST (getASTNames) +import GHC.Unicode (toUpper) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -41,14 +42,18 @@ cf2ScalaParser Options{ lang } cf = vcat $ [nest 4 $ vcat $ -- Add extra classes addExtraClasses ++ + [text ""] ++ -- Add apply function getApplyFunction ++ + [text ""] ++ -- Add program function getProgramFunction cf ++ + [text ""] ++ -- Add parser rules - generateAllRules (ruleGroups cf) + generateAllRules (ruleGroups cf) ++ + [text ""] -- inspect rules, only for debugging - ++ inspectListRulesByCategory (ruleGroups cf) + -- ++ inspectListRulesByCategory (ruleGroups cf) ] ++ -- End the WorkflowParser object endWorkflowClass ++ @@ -60,60 +65,63 @@ generateAllRules :: [(Cat, [Rule])] -> [Doc] generateAllRules catsAndRules = let -- filter categories with all the rules isNilCons - -- TODO: check if this is correct, I'm assuming all the lists are processed using the rep and the scala List rulesToProcess = filter (not . all isNilCons . snd) catsAndRules -- Generate regular rules mainRules = map text $ concatMap generateRuleGroup (fixCoercions rulesToProcess) + -- existe isUsedCat seguramente nos simplifique esto + -- existe specialCats seguramente nos simplifique esto + -- existe sigLookup wtf con esto -- Generate special rules for integer and string if needed integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" "pInteger" catsAndRules stringRule = generateSpecialRule catString "string" "STRING" "toString" "pString" catsAndRules charRule = generateSpecialRule catChar "char" "CHAR" "charAt(0)" "pChar" catsAndRules - identRule = generateSpecialRule catIdent "ident" "IDENT" "toString" "pString" catsAndRules + identRule = generateSpecialRule catIdent "ident" "IDENT" "toString" "pIdent" catsAndRules in mainRules ++ integerRule ++ stringRule ++ charRule ++ identRule -getRuleFunName :: Rule -> String -getRuleFunName (Rule fnam _ _ _) = firstLowerCase $ funName fnam -getRulesFunsName :: [Rule] -> [String] -getRulesFunsName rules = nub $ map getRuleFunName rules +getRuleFunName :: Cat -> Rule -> String +getRuleFunName cat (Rule fnam _ _ _) = firstLowerCase $ prefixIfNeeded $ funName fnam + where + prefixIfNeeded name + | map toLower name == map toLower (safeCatName cat) || isJust (scalaReserverWords (map toLower name)) = "internal_" ++ map toLower name + | otherwise = name + +getRulesFunsName :: [Rule] -> Cat -> [String] +getRulesFunsName rules cat = nub $ map (getRuleFunName cat) rules -- | Generate a rule group (definition for a single category) generateRuleGroup :: (Cat, [Rule]) -> [String] generateRuleGroup (cat, rules) = ["def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ - [replicate 4 ' ' ++ intercalate " | " (getRulesFunsName nonCoercionRules)] ++ + [replicate 4 ' ' ++ intercalate " | " subFuns] ++ ["}"] ++ - concatMap generateRuleFor nonCoercionRules + concatMap (generateSingleRuleBody cat) nonCoercionRules where + subFuns = getRulesFunsName nonCoercionRules cat catName = safeCatName cat nonCoercionRules = filter (not . isCoercion) rules -- rulesToProcess = filter (\rule -> not (isListCat (wpThing $ valRCat rule))) nonCoercionRules - generateRuleFor :: Rule -> [String] - generateRuleFor rule = generateRuleBody [rule] - --- | Based on a list of rules, generate the functions for the parser -generateRuleBody :: [Rule] -> [String] -generateRuleBody rules = concatMap generateSingleRuleBody rules - -generateSingleRuleBody :: Rule -> [String] -generateSingleRuleBody rule@(Rule _ _ _ _) = [generateRuleDefinition rule ++ generateRuleTransformation rule] +generateSingleRuleBody :: Cat -> Rule -> [String] +generateSingleRuleBody cat rule = [generateRuleDefinition cat rule ++ generateRuleTransformation rule] -generateRuleDefinition :: Rule -> String -generateRuleDefinition rule = - "def " ++ getRuleFunName rule ++ ": Parser[WorkflowAST] =" +generateRuleDefinition :: Cat -> Rule -> String +generateRuleDefinition cat rule = + "def " ++ getRuleFunName cat rule ++ ": Parser[WorkflowAST] =" +++ intercalate " ~ " (generateRuleForm rule) generateRuleForm :: Rule -> [String] generateRuleForm rule@(Rule _ _ rhs _) = if isRecursiveRule rule then case rhsToSafeStrings rhs of - (_ : rest) -> "integer" : rest + (_ : rest) -> "string" : rest [] -> [""] -- Handle empty rhs case else case rhs of - [Right s] -> [map toUpper s ++ "()"] + [Right s] -> case s of + "_" -> ["UNDERSCORE()"] -- why only this is not being process correctly, should this be wildcard instead ? + _ -> [fromMaybe (map toUpper s) (symbolToName s) ++ "()"] _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) generateRuleTransformation :: Rule -> String @@ -140,41 +148,33 @@ isRuleOnlySpecials (Rule _ _ rhs _) = -- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String -generateCaseStatement rule@(Rule fun _ _ _) +generateCaseStatement rule@(Rule fun _ rhs _) | isCoercion fun = "" | null vars = "_ => " ++ fnm ++ "()" | otherwise = "case (" ++ intercalate " ~ " params ++ ") => " ++ fnm ++ "(" ++ intercalate ", " vars ++ ")" where - getRHSParamsFromRule :: Rule -> [String] - getRHSParamsFromRule (Rule _ _ items _) = - disambiguateNames $ map modifyParams $ map getSymbFromName $ safeCatToStrings items - - getFunVarsWithTypeFromRule :: Rule -> [(String, String)] - getFunVarsWithTypeFromRule (Rule _ _ items _) = - disambiguateTuples $ map (\(c, s) -> (modifyVars (getSymbFromName (safeCatName c)), s)) $ map addTypesToVars $ getRHSCats items - getBaseType :: Cat -> String getBaseType cat | isListCat cat = "List[WorkflowAST]" | otherwise = "WorkflowAST" - addTypesToVars :: Cat -> (Cat, String) - addTypesToVars cat = (cat, ".asInstanceOf[" ++ getBaseType cat ++ "]") + fnm = fromMaybe (funName fun) $ listToMaybe $ getASTNames [rule] + + -- generate a list of with (rule, finalName) + zipped = zip rhs (disambiguateNames $ map getSymb rhs) + + getSymb (Left cat) = [toLower $ safeHeadChar $ safeCatName cat] + getSymb (Right str) = [toLower $ safeHeadChar $ fromMaybe "_" $ symbolToName str] - params = concatMap getRHSParamsFromRule [rule] - vars = map (\(var, typ) -> var ++ typ) $ getFunVarsWithTypeFromRule rule - fnm = funName fun + params = map (wildCardSymbs.snd) zipped - modifyVars str - | all isAlphaNum str = [toLower $ safeHeadChar str] - | otherwise = [toLower $ safeHeadChar $ getSymbFromName str] + -- For Left cat we assign types, for Right str (tokens), we ignore (or use "_") + vars = [ p ++ ".asInstanceOf[" ++ getBaseType cat ++ "]" + | (Left cat, p) <- zipped + ] - modifyParams str - | "()" `isSuffixOf` str = "_" - | all isAlphaNum str = [toLower $ safeHeadChar str] - | otherwise = [toLower $ safeHeadChar $ getSymbFromName str] -- | Generate a special rule for tokens like Integer or String @@ -263,7 +263,7 @@ getProgramFunction :: CF -> [Doc] getProgramFunction cf = [ text "", text "def program: Parser[WorkflowAST] = positioned {", - nest 4 $ text $ "phrase(" ++ entryPoint ++ ")", + nest 4 $ text $ "phrase(" ++ fromMaybe entryPoint (scalaReserverWords entryPoint) ++ ")", text "}" ] where diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index a006493e0..73d43d96b 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -14,7 +14,7 @@ Created : 30 September, 2024 -} -module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST) where +module BNFC.Backend.Scala.CFtoScalaParserAST (cf2ScalaParserAST, getASTNames) where import Prelude hiding ((<>)) @@ -24,14 +24,14 @@ import BNFC.CF Cat(ListCat), IsFun(funName, isCoercion), Rul(Rule), - Rule, - WithPosition(wpThing), TokenCat, literals ) + Rule, TokenCat, literals, wpThing ) import BNFC.PrettyPrint ( text, vcat, Doc ) import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList) import Data.List (intercalate) import BNFC.Utils ((+++)) import Data.Maybe (fromMaybe) +import GHC.OldList (nub) -- | Main function that generates the AST code cf2ScalaParserAST :: SharedOptions -> CF -> Doc @@ -45,7 +45,14 @@ cf2ScalaParserAST Options{ lang } cf = vcat $ generateLiteralsDefs allLiterals where rules = ruleGroups cf - allLiterals = literals cf + allLiterals = nub $ literals cf + +getASTNames :: [Rule] -> [String] +getASTNames rules = rulesNames + where + rulesNames = map (\(Rule fun _ _ _) -> funName fun) $ + filter (\(Rule _ cat _ _) -> not $ case wpThing cat of ListCat _ -> True; _ -> False) filteredRules + filteredRules = filter (not . isCoercionRule) $ rules -- | Generate all case class definitions generateRuleDefs :: [(Cat, [Rule])] -> [Doc] @@ -72,19 +79,17 @@ generateClassParams (Rule _ _ rhs _) = filteredRhs = filter isLeft rhs - -- | Format a parameter with its type formatParamType :: Cat -> String -formatParamType cat = wrapList cat "WorkflowAST" -- Si no es baseTokenCat, usar WorkflowAST +formatParamType cat = wrapList cat "WorkflowAST" -- | Create a single case class definition createCaseClass :: Rule -> Doc createCaseClass rule@(Rule fun cat _ _) - | ListCat _ <- (wpThing cat) = "" -- TODO: here we should process the list + | ListCat _ <- (wpThing cat) = mempty | otherwise = text $ formatCaseClass className params where className = funName fun - -- Apply `catParams` to each element of `rhs` params = generateClassParams rule -- | Helper function to format the case class definition @@ -93,7 +98,9 @@ formatCaseClass className params = "case class " ++ className ++ "(" ++ params + -- | Generate the Scala types for basic LBNF types generateLiteralsDefs :: [TokenCat] -> [Doc] -generateLiteralsDefs tokens = map (\token -> text $ formatCaseClass ("p" ++ token) ("var1: " ++ fromMaybe token (baseTypeToScalaType token))) tokens +generateLiteralsDefs tokens = map ( + \token -> text $ formatCaseClass ("p" ++ token) ("var1: " ++ fromMaybe "String" (baseTypeToScalaType token)) + ) tokens -- | Generate the header part of the file headers :: String -> [Doc] diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 1446b671f..828470b04 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -14,7 +14,8 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, - getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples + getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples, + wildCardSymbs ) where import BNFC.CF import Data.Map @@ -55,6 +56,7 @@ scalaTypesMap = [ ("Integer" , "Int") , ("String" , "String") , ("Double" , "Double") + , ("Ident" , "String") ] scalaReserverWords :: String -> Maybe String @@ -72,6 +74,7 @@ wordsMap = , ("val" , "pval") , ("var" , "pvar") , ("class", "pclass") + , ("type", "ptype") , ("object", "pobject") , ("trait", "ptrait") , ("extends", "pextends") @@ -95,6 +98,12 @@ wordsMap = , ("for", "pfor") , ("do", "pdo") , ("match", "pmatch") + , ("try", "pttry") + , ("catch", "pcatch") + , ("finally", "pfinally") + , ("throw", "pthrow") + , ("apply", "papply") + , ("program", "pprogram") ] -- | Safe version of tail that returns an empty list for an empty list @@ -140,6 +149,12 @@ getSymbFromName s = Just s -> s ++ "()" _ -> s +wildCardSymbs :: String -> String +wildCardSymbs s = + case symbolToName s of + Just _ -> "_" + _ -> s + -- | Convert a category or string to its string representation safeCatToStrings :: [Either Cat String] -> [String] safeCatToStrings = Prelude.map (\case From 2d6e3af7d3133f5fdc0bfbe3a08703abc1b8a45b Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sun, 13 Apr 2025 01:54:32 +0200 Subject: [PATCH 34/42] chi funcionando para casos basicos --- source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 15 ++++- .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 14 +++-- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 6 +- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 6 +- source/src/BNFC/Backend/Scala/Utils.hs | 55 ++++++++++++++++++- 5 files changed, 82 insertions(+), 14 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index d5b6713c8..233ed12e1 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -26,6 +26,8 @@ import Data.List (intercalate) import Data.Char (toLower) import GHC.OldList (nub) import GHC.Unicode (toUpper) +import BNFC.Backend.Scala.Utils (scalaReserverWords, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated) +import Data.Maybe (fromMaybe) -- | Converts a string to lowercase toLowerString :: String -> String @@ -176,13 +178,15 @@ getTokensFunction :: [String] -> [Doc] getTokensFunction symbs = [ text "", text "def tokens: Parser[List[WorkflowToken]] = {", - nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " (getListSymNames symbs) ++ "))", + nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " proccesedRepetedSymbs ++ "))", text "}" ] + where + proccesedRepetedSymbs = applyToRepeated ("p" ++) $ getListSymNames symbs -- | Get the lowercase symbol name getSymName :: String -> String -getSymName = toLowerString . getSymbFromName +getSymName s = fromMaybe (toLowerString $ getSymbFromName s) $ scalaReserverWords $ toLowerString $ getSymbFromName s -- | Get a list of symbol names getListSymNames :: [String] -> [String] @@ -192,4 +196,9 @@ getListSymNames = map getSymName getBaseLexs :: String -> Doc getBaseLexs symb = text "" $+$ - text ("def " ++ getSymName symb ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ => " ++ getSymbFromName symb ++ "()) }") + text ("def " ++ defName ++ " = positioned { \"" ++ toLowerString symb ++ "\" ^^ (_ => " ++ param' ++ "()) }") + where + param = map toUpper $ getSymbFromName symb + param' = fromMaybe param $ mapManualTypeMap param + defName = fromMaybe (getSymName symb) $ scalaBNFCReserverWords $ getSymName symb + diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index 342bd0bdb..b517dc959 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -27,6 +27,9 @@ import BNFC.Utils (symbolToName) import Data.Char (toUpper) import System.Directory.Internal.Prelude (toLower) import Data.List (nub) +import BNFC.Backend.Scala.Utils (scalaReserverWords, baseTypeToScalaType, mapManualTypeMap) +import Data.Maybe (fromMaybe) +import Debug.Trace (trace) cf2ScalaLexToken :: SharedOptions @@ -36,8 +39,8 @@ cf2ScalaLexToken Options{ lang } cf = vsep . concat $ [ headers lang , [text $ concat $ map generateSymbClass (symbs)] - , [generateStringClasses (filter (\x -> map toLower x `notElem` map (map toLower) symbs) liters)] - , [generateKeyWordClasses (filter (\x -> map toLower x `notElem` map (map toLower) (symbs ++ liters)) (keyWords ++ ["empty"]))] + , [generateStringClasses liters] + , [generateKeyWordClasses (keyWords ++ ["empty"])] -- , [text $ "Symbols: " ++ show symbs] -- , [text $ "Literals: " ++ show liters] -- , [text $ "Keywords: " ++ show keyWords] @@ -50,7 +53,7 @@ cf2ScalaLexToken Options{ lang } cf = vsep . concat $ generateSymbClass :: String -> String generateSymbClass symb = case symbolToName symb of - Just s -> "case class " ++ s ++ "() extends WorkflowToken \n" + Just s -> "case class " ++ fromMaybe s (scalaReserverWords s) ++ "() extends WorkflowToken \n" Nothing -> mempty @@ -58,7 +61,10 @@ generateKeyWordClasses :: [String] -> Doc generateKeyWordClasses params = text $ concat $ map generateKeyWordClass params generateKeyWordClass :: String -> String -generateKeyWordClass param = "case class " ++ (map toUpper param) ++ "() extends WorkflowToken \n" +generateKeyWordClass key = "case class " ++ param' ++ "() extends WorkflowToken \n" + where + param = map toUpper key + param' = fromMaybe param $ mapManualTypeMap param generateStringClasses :: [String] -> Doc diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index c1d65a119..4c7895f69 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -18,7 +18,7 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) -import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords) +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) @@ -121,8 +121,10 @@ generateRuleForm rule@(Rule _ _ rhs _) = else case rhs of [Right s] -> case s of "_" -> ["UNDERSCORE()"] -- why only this is not being process correctly, should this be wildcard instead ? - _ -> [fromMaybe (map toUpper s) (symbolToName s) ++ "()"] + _ -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) + where + paramS s = fromMaybe (map toUpper s) (symbolToName s) generateRuleTransformation :: Rule -> String generateRuleTransformation rule = diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 73d43d96b..1031dde15 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -27,7 +27,7 @@ import BNFC.CF Rule, TokenCat, literals, wpThing ) import BNFC.PrettyPrint ( text, vcat, Doc ) import BNFC.Options ( SharedOptions(lang, Options) ) -import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList) +import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList, scalaReserverWords) import Data.List (intercalate) import BNFC.Utils ((+++)) import Data.Maybe (fromMaybe) @@ -50,7 +50,7 @@ cf2ScalaParserAST Options{ lang } cf = vcat $ getASTNames :: [Rule] -> [String] getASTNames rules = rulesNames where - rulesNames = map (\(Rule fun _ _ _) -> funName fun) $ + rulesNames = map (\(Rule fun _ _ _) -> fromMaybe (funName fun) $ scalaReserverWords $ funName fun) $ filter (\(Rule _ cat _ _) -> not $ case wpThing cat of ListCat _ -> True; _ -> False) filteredRules filteredRules = filter (not . isCoercionRule) $ rules @@ -89,7 +89,7 @@ createCaseClass rule@(Rule fun cat _ _) | ListCat _ <- (wpThing cat) = mempty | otherwise = text $ formatCaseClass className params where - className = funName fun + className = fromMaybe (funName fun) $ scalaReserverWords $ funName fun params = generateClassParams rule -- | Helper function to format the case class definition diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 828470b04..174e1dad0 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -15,7 +15,7 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples, - wildCardSymbs + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated ) where import BNFC.CF import Data.Map @@ -102,7 +102,11 @@ wordsMap = , ("catch", "pcatch") , ("finally", "pfinally") , ("throw", "pthrow") + , ("true", "ptrue") + , ("false", "pfalse") , ("apply", "papply") + , ("Int", "pInt") + , ("String", "pString") , ("program", "pprogram") ] @@ -242,4 +246,51 @@ disambiguateTuples :: [(String, String)] -> [(String, String)] disambiguateTuples tuples = let (names, values) = unzip tuples newNames = disambiguateNames names - in zip newNames values \ No newline at end of file + in zip newNames values + + -- | Convert base LBNF type to Scala type +mapManualTypeMap :: String -> Maybe String +mapManualTypeMap = (`Data.Map.lookup` manualTypeMap) + +-- | Map from base LBNF Type to scala Type +manualTypeMap :: Map String String +manualTypeMap = fromList manualTypesMap + +-- | Scala types mapping +manualTypesMap :: [(String, String)] +manualTypesMap = + [ ("INTEGER" , "PINTEGER") + , ("STRING" , "PSTRING") + , ("IDENT" , "PIDENT") + ] + + +-- | There are some words used in the Scala parser combinator code generated by BNFC +scalaBNFCReserverWords :: String -> Maybe String +scalaBNFCReserverWords = (`Data.Map.lookup` baseScalaBNFCReserverWordsMap) + +-- | Map from base LBNF Type to scala Type +baseScalaBNFCReserverWordsMap :: Map String String +baseScalaBNFCReserverWordsMap = fromList scalaBNFCReserverWordsMap + +-- | Scala types mapping +scalaBNFCReserverWordsMap :: [(String, String)] +scalaBNFCReserverWordsMap = + [ ("string" , "pstring") + , ("integer" , "pinteger") + , ("char" , "pchar") + , ("program" , "internal_program") + , ("apply" , "internal_apply") + ] + + +-- | applies a function only to the first occurrence of a repetead element of the list +-- | example: applyToRepeated (++ "1") ["a", "b", "a", "c"] +-- | ["a", "b", "a1", "c"] +applyToRepeated :: Eq a => (a -> a) -> [a] -> [a] +applyToRepeated f xs = apply [] xs + where + apply _ [] = [] + apply seen (y:ys) + | y `elem` seen = f y : ys + | otherwise = y : apply (y:seen) ys From 1d01fe568b5212097474ae3d49792b18e525d319 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Sun, 13 Apr 2025 21:34:47 +0200 Subject: [PATCH 35/42] chi working better still need to fix the case of -1 --- .../BNFC/Backend/Scala/CFtoScalaLexToken.hs | 4 +- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 69 +++++++++++++------ source/src/BNFC/Backend/Scala/Utils.hs | 23 ++++--- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs index b517dc959..576b832d8 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLexToken.hs @@ -25,11 +25,9 @@ import BNFC.Options import BNFC.Backend.Common (unicodeAndSymbols) import BNFC.Utils (symbolToName) import Data.Char (toUpper) -import System.Directory.Internal.Prelude (toLower) import Data.List (nub) -import BNFC.Backend.Scala.Utils (scalaReserverWords, baseTypeToScalaType, mapManualTypeMap) +import BNFC.Backend.Scala.Utils (scalaReserverWords, mapManualTypeMap) import Data.Maybe (fromMaybe) -import Debug.Trace (trace) cf2ScalaLexToken :: SharedOptions diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 4c7895f69..4beb2d0a5 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -18,7 +18,7 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) -import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap) +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, prettyRule) import BNFC.CF import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) @@ -50,7 +50,7 @@ cf2ScalaParser Options{ lang } cf = vcat $ getProgramFunction cf ++ [text ""] ++ -- Add parser rules - generateAllRules (ruleGroups cf) ++ + generateAllRules cf (ruleGroups cf) ++ [text ""] -- inspect rules, only for debugging -- ++ inspectListRulesByCategory (ruleGroups cf) @@ -61,13 +61,13 @@ cf2ScalaParser Options{ lang } cf = vcat $ addWorkflowCompiler -- | Generate all parser rules from rule groups -generateAllRules :: [(Cat, [Rule])] -> [Doc] -generateAllRules catsAndRules = +generateAllRules :: CF -> [(Cat, [Rule])] -> [Doc] +generateAllRules cf catsAndRules = let -- filter categories with all the rules isNilCons rulesToProcess = filter (not . all isNilCons . snd) catsAndRules -- Generate regular rules - mainRules = map text $ concatMap generateRuleGroup (fixCoercions rulesToProcess) + mainRules = map text $ concatMap (generateRuleGroup cf) (fixCoercions rulesToProcess) -- existe isUsedCat seguramente nos simplifique esto -- existe specialCats seguramente nos simplifique esto @@ -92,36 +92,63 @@ getRulesFunsName :: [Rule] -> Cat -> [String] getRulesFunsName rules cat = nub $ map (getRuleFunName cat) rules -- | Generate a rule group (definition for a single category) -generateRuleGroup :: (Cat, [Rule]) -> [String] -generateRuleGroup (cat, rules) = +generateRuleGroup :: CF -> (Cat, [Rule]) -> [String] +generateRuleGroup cf (cat, rules) = ["def " ++ catName ++ ": Parser[WorkflowAST] = positioned {"] ++ [replicate 4 ' ' ++ intercalate " | " subFuns] ++ ["}"] ++ - concatMap (generateSingleRuleBody cat) nonCoercionRules + concatMap (generateSingleRuleBody cf cat) nonCoercionRules where subFuns = getRulesFunsName nonCoercionRules cat catName = safeCatName cat - nonCoercionRules = filter (not . isCoercion) rules + nonCoercionRules = reverse $ map snd $ sortRulesByPrecedence $ filter (not . isCoercion) rules -- rulesToProcess = filter (\rule -> not (isListCat (wpThing $ valRCat rule))) nonCoercionRules -generateSingleRuleBody :: Cat -> Rule -> [String] -generateSingleRuleBody cat rule = [generateRuleDefinition cat rule ++ generateRuleTransformation rule] +generateSingleRuleBody :: CF -> Cat -> Rule -> [String] +generateSingleRuleBody cf cat rule = [generateRuleDefinition cf cat rule ++ generateRuleTransformation rule] -generateRuleDefinition :: Cat -> Rule -> String -generateRuleDefinition cat rule = +generateRuleDefinition :: CF -> Cat -> Rule -> String +generateRuleDefinition cf cat rule = "def " ++ getRuleFunName cat rule ++ ": Parser[WorkflowAST] =" - +++ intercalate " ~ " (generateRuleForm rule) + +++ intercalate " ~ " (generateRuleForm cf rule) -generateRuleForm :: Rule -> [String] -generateRuleForm rule@(Rule _ _ rhs _) = + +getBaseCatOfRecursiveRule :: Rule -> [Cat] +getBaseCatOfRecursiveRule (Rule _ _ rhs _) = + nub $ concatMap extractBaseCat rhs + where + -- Extrae las categorías base de un elemento del RHS + extractBaseCat :: Either Cat String -> [Cat] + extractBaseCat (Left cat) + | isBaseCat cat = [cat] + | otherwise = [] + extractBaseCat (Right _) = [] + + isBaseCat :: Cat -> Bool + isBaseCat cat = isSpecialCat $ normCat cat + + + +getBasesOfRecursiveRule :: CF -> Rule -> String +getBasesOfRecursiveRule cf rule = + let + allRulesForCat = rulesForNormalizedCat cf (normCat $ wpThing $ valRCat rule) + baseTypes = nub $ map safeCatName $ concatMap getBaseCatOfRecursiveRule allRulesForCat + in + case baseTypes of + [] -> "" + x:[] -> x + x:xs -> "(" ++ intercalate " | " (x:xs) ++ ")" + + +generateRuleForm :: CF -> Rule -> [String] +generateRuleForm cf rule@(Rule _ _ rhs _) = if isRecursiveRule rule then case rhsToSafeStrings rhs of - (_ : rest) -> "string" : rest - [] -> [""] -- Handle empty rhs case + (_ : rest) -> getBasesOfRecursiveRule cf rule : rest + [] -> [""] else case rhs of - [Right s] -> case s of - "_" -> ["UNDERSCORE()"] -- why only this is not being process correctly, should this be wildcard instead ? - _ -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] + [Right s] -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) where paramS s = fromMaybe (map toUpper s) (symbolToName s) diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 174e1dad0..79ff6be46 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -15,7 +15,7 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples, - wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule ) where import BNFC.CF import Data.Map @@ -196,7 +196,7 @@ hasTokenCat :: TokenCat -> Rule -> Bool hasTokenCat token (Rule _ _ rhs _) = TokenCat token `elem` [c | Left c <- rhs] isSpecialCat :: Cat -> Bool -isSpecialCat (TokenCat cat) = cat `elem` baseTokenCatNames +isSpecialCat (TokenCat cat) = cat `elem` specialCatsP isSpecialCat _ = False @@ -204,14 +204,16 @@ isSpecialCat _ = False inspectRulesByCategory :: (Cat, [Rule]) -> [Doc] inspectRulesByCategory (cat, rules) = [text "Category:" <+> pretty cat] ++ - [text "Rules:" $$ nest 2 (vcat (Prelude.map prettyRule rules))] - where - -- Pretty-print a single Rule - prettyRule (Rule fun valCat rhs internal) = - text "Function:" <+> pretty fun $$ - text "Value Category:" <+> pretty valCat $$ - text "RHS:" <+> text (show rhs) $$ - text "Internal:" <+> text (internalRuleToStrings internal) + [text "Rules:" $$ nest 2 (vcat (Prelude.map (vcat . Prelude.map text . prettyRule) rules))] + +-- Pretty-print a single Rule +prettyRule :: Rule -> [String] +prettyRule (Rule fun valCat rhs internal) = + [ "Function: " ++ (show $ wpThing fun) + , "Value Category: " ++ (show $ wpThing valCat) + , "RHS: " ++ show rhs + , "Internal: " ++ internalRuleToStrings internal + ] internalRuleToStrings :: InternalRule -> String internalRuleToStrings Internal = "Internal" @@ -261,6 +263,7 @@ manualTypesMap :: [(String, String)] manualTypesMap = [ ("INTEGER" , "PINTEGER") , ("STRING" , "PSTRING") + , ("CHAR" , "PCHAR") , ("IDENT" , "PIDENT") ] From a7825ca809aebd554f805f12ee11438d96ab93b2 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 14 Apr 2025 19:42:18 +0200 Subject: [PATCH 36/42] chi working with if and calc --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 44 +++++++++++++--- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 5 +- source/src/BNFC/Backend/Scala/Utils.hs | 50 +++++++++++++++++-- 3 files changed, 85 insertions(+), 14 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 4beb2d0a5..d23dcda53 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -18,8 +18,26 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) -import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, prettyRule) +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, prettyRule, getBiggerCoercion, isCoercionCategory) import BNFC.CF + ( allEntryPoints, + catChar, + catIdent, + catInteger, + catString, + isNilCons, + normCat, + ruleGroups, + rulesForNormalizedCat, + sameCat, + sortRulesByPrecedence, + CF, + Cat, + IsFun(isCoercion, funName), + Rul(Rule, valRCat, rhsRule), + Rule, + TokenCat, + WithPosition(wpThing) ) import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) @@ -30,6 +48,7 @@ import Data.Maybe (listToMaybe, fromMaybe, isJust) import BNFC.Utils ((+++), symbolToName) import BNFC.Backend.Scala.CFtoScalaParserAST (getASTNames) import GHC.Unicode (toUpper) +import Debug.Trace (trace) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -141,16 +160,27 @@ getBasesOfRecursiveRule cf rule = x:xs -> "(" ++ intercalate " | " (x:xs) ++ ")" -generateRuleForm :: CF -> Rule -> [String] -generateRuleForm cf rule@(Rule _ _ rhs _) = - if isRecursiveRule rule - then case rhsToSafeStrings rhs of - (_ : rest) -> getBasesOfRecursiveRule cf rule : rest - [] -> [""] +generateRuleForm :: CF -> Rule -> [String] +generateRuleForm cf rule@(Rule _ c rhs _) = + if isRecursiveRule rule + then generateRecursiveRuleForm rhs else case rhs of [Right s] -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) where + generateRecursiveRuleForm :: [Either Cat String] -> [String] + generateRecursiveRuleForm ([]) = mempty + generateRecursiveRuleForm (r:[]) = case r of + Left cat -> if isCoercionCategory cat then [getBasesOfRecursiveRule cf rule] else rhsToSafeStrings [r] + Right _ -> rhsToSafeStrings [r] + generateRecursiveRuleForm (r:rest) = generateRecursiveRuleForm [r] ++ generateRecursiveRuleForm rest + + -- isCoercion cat = isCoercion (noPosition $ catToStr cat) + isBiggerCat cat = case maxCoerCat of + Just cat' -> cat' == cat + Nothing -> False + + maxCoerCat = getBiggerCoercion (wpThing c) cf paramS s = fromMaybe (map toUpper s) (symbolToName s) generateRuleTransformation :: Rule -> String diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 1031dde15..14833329e 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -27,7 +27,7 @@ import BNFC.CF Rule, TokenCat, literals, wpThing ) import BNFC.PrettyPrint ( text, vcat, Doc ) import BNFC.Options ( SharedOptions(lang, Options) ) -import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList, scalaReserverWords) +import BNFC.Backend.Scala.Utils (generateVarsList, isLeft, baseTypeToScalaType, wrapList, scalaReserverWords, isCoercionRule) import Data.List (intercalate) import BNFC.Utils ((+++)) import Data.Maybe (fromMaybe) @@ -63,9 +63,6 @@ generateRuleDefs rules = concatMap processRuleGroup rules processRuleGroup :: (Cat, [Rule]) -> [Doc] processRuleGroup (_, rules) = map createCaseClass (filter (not . isCoercionRule) rules) --- | Check if a rule is a coercion rule -isCoercionRule :: Rule -> Bool -isCoercionRule (Rule fun _ _ _) = isCoercion fun -- | Generate the class params generateClassParams :: Rule -> String diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 79ff6be46..204a3bb34 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -15,17 +15,18 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples, - wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule, isCoercionRule, getBiggerCoercion, isCoercionCategory ) where import BNFC.CF import Data.Map import BNFC.Backend.Common.NamedVariables (firstLowerCase) import System.Directory.Internal.Prelude (fromMaybe) import BNFC.Utils (symbolToName) -import Data.Char (toUpper) +import Data.Char (toUpper, isDigit) import Text.PrettyPrint import BNFC.PrettyPrint -import Data.List (isSuffixOf) +import Data.List (isSuffixOf, sortOn) +import Debug.Trace (trace) generateVarsList :: [a] -> [String] @@ -297,3 +298,46 @@ applyToRepeated f xs = apply [] xs apply seen (y:ys) | y `elem` seen = f y : ys | otherwise = y : apply (y:seen) ys + +-- | Check if a rule is a coercion rule +isCoercionRule :: Rule -> Bool +isCoercionRule (Rule fun _ _ _) = isCoercion fun + +isCoercionCategory :: Cat -> Bool +isCoercionCategory (CoercCat _ _) = True +isCoercionCategory _ = False +-- | Get the bigger coercion rule of a category +-- getBiggerCoercion :: Cat -> CF -> Cat +-- getBiggerCoercion cat cf = +-- let +-- allCategories = reallyAllCats cf +-- in if length allCategories == 0 +-- then error "grammar without categories" +-- else head allCategories + +-- import Data.Char (isDigit) +-- import Data.List (isPrefixOf, sortOn) + +getBiggerCoercion :: Cat -> CF -> Maybe Cat +getBiggerCoercion targetCat cf = + let + -- Extrae el prefijo (no numérico) y el número final (si hay) + (prefix, levelStr) = span (not . isDigit) $ show targetCat + level = if length levelStr > 0 then read levelStr :: Int else 0 + + -- Filtra los que tengan el mismo prefijo y número >= al actual + matching = [ (cat, read digits :: Int) + | cat <- allCats + , let (p, digits) = span (not . isDigit) (show cat) + , p == prefix + , not (Prelude.null digits) + , all isDigit digits + , read digits >= level + ] + + -- Ordenamos por nivel y tomamos el más grande + in case matching of + [] -> trace ("targetCat: " ++ show targetCat) Nothing + _ -> trace ("encontraos al mas grande: " ++ show (fst $ last $ sortOn snd matching)) Just $ fst $ last $ sortOn snd matching + where + allCats = reallyAllCats cf \ No newline at end of file From 10e883220a7914c3d2b788bf3da2cd7df322b0fe Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 14 Apr 2025 19:43:55 +0200 Subject: [PATCH 37/42] fix cabal warnings --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 11 +---- .../BNFC/Backend/Scala/CFtoScalaParserAST.hs | 2 +- source/src/BNFC/Backend/Scala/Utils.hs | 42 ++----------------- 3 files changed, 6 insertions(+), 49 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index d23dcda53..012ccf8fe 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -18,7 +18,7 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) -import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, inspectListRulesByCategory, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, prettyRule, getBiggerCoercion, isCoercionCategory) +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, isCoercionCategory) import BNFC.CF ( allEntryPoints, catChar, @@ -48,7 +48,6 @@ import Data.Maybe (listToMaybe, fromMaybe, isJust) import BNFC.Utils ((+++), symbolToName) import BNFC.Backend.Scala.CFtoScalaParserAST (getASTNames) import GHC.Unicode (toUpper) -import Debug.Trace (trace) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -161,7 +160,7 @@ getBasesOfRecursiveRule cf rule = generateRuleForm :: CF -> Rule -> [String] -generateRuleForm cf rule@(Rule _ c rhs _) = +generateRuleForm cf rule@(Rule _ _ rhs _) = if isRecursiveRule rule then generateRecursiveRuleForm rhs else case rhs of @@ -175,12 +174,6 @@ generateRuleForm cf rule@(Rule _ c rhs _) = Right _ -> rhsToSafeStrings [r] generateRecursiveRuleForm (r:rest) = generateRecursiveRuleForm [r] ++ generateRecursiveRuleForm rest - -- isCoercion cat = isCoercion (noPosition $ catToStr cat) - isBiggerCat cat = case maxCoerCat of - Just cat' -> cat' == cat - Nothing -> False - - maxCoerCat = getBiggerCoercion (wpThing c) cf paramS s = fromMaybe (map toUpper s) (symbolToName s) generateRuleTransformation :: Rule -> String diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs index 14833329e..a3616fc6c 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParserAST.hs @@ -22,7 +22,7 @@ import BNFC.CF ( ruleGroups, CF, Cat(ListCat), - IsFun(funName, isCoercion), + IsFun(funName), Rul(Rule), Rule, TokenCat, literals, wpThing ) import BNFC.PrettyPrint ( text, vcat, Doc ) diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 204a3bb34..cd5d3329a 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -15,18 +15,17 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples, - wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule, isCoercionRule, getBiggerCoercion, isCoercionCategory + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule, isCoercionRule, isCoercionCategory ) where import BNFC.CF import Data.Map import BNFC.Backend.Common.NamedVariables (firstLowerCase) import System.Directory.Internal.Prelude (fromMaybe) import BNFC.Utils (symbolToName) -import Data.Char (toUpper, isDigit) +import Data.Char (toUpper) import Text.PrettyPrint import BNFC.PrettyPrint -import Data.List (isSuffixOf, sortOn) -import Debug.Trace (trace) +import Data.List (isSuffixOf) generateVarsList :: [a] -> [String] @@ -306,38 +305,3 @@ isCoercionRule (Rule fun _ _ _) = isCoercion fun isCoercionCategory :: Cat -> Bool isCoercionCategory (CoercCat _ _) = True isCoercionCategory _ = False --- | Get the bigger coercion rule of a category --- getBiggerCoercion :: Cat -> CF -> Cat --- getBiggerCoercion cat cf = --- let --- allCategories = reallyAllCats cf --- in if length allCategories == 0 --- then error "grammar without categories" --- else head allCategories - --- import Data.Char (isDigit) --- import Data.List (isPrefixOf, sortOn) - -getBiggerCoercion :: Cat -> CF -> Maybe Cat -getBiggerCoercion targetCat cf = - let - -- Extrae el prefijo (no numérico) y el número final (si hay) - (prefix, levelStr) = span (not . isDigit) $ show targetCat - level = if length levelStr > 0 then read levelStr :: Int else 0 - - -- Filtra los que tengan el mismo prefijo y número >= al actual - matching = [ (cat, read digits :: Int) - | cat <- allCats - , let (p, digits) = span (not . isDigit) (show cat) - , p == prefix - , not (Prelude.null digits) - , all isDigit digits - , read digits >= level - ] - - -- Ordenamos por nivel y tomamos el más grande - in case matching of - [] -> trace ("targetCat: " ++ show targetCat) Nothing - _ -> trace ("encontraos al mas grande: " ++ show (fst $ last $ sortOn snd matching)) Just $ fst $ last $ sortOn snd matching - where - allCats = reallyAllCats cf \ No newline at end of file From d488defbeb5a8d009e14541588360ad2e5819eba Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 14 Apr 2025 20:37:30 +0200 Subject: [PATCH 38/42] fix case with multiple operators in chi --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 012ccf8fe..2535605ab 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -146,7 +146,6 @@ getBaseCatOfRecursiveRule (Rule _ _ rhs _) = isBaseCat cat = isSpecialCat $ normCat cat - getBasesOfRecursiveRule :: CF -> Rule -> String getBasesOfRecursiveRule cf rule = let @@ -162,17 +161,26 @@ getBasesOfRecursiveRule cf rule = generateRuleForm :: CF -> Rule -> [String] generateRuleForm cf rule@(Rule _ _ rhs _) = if isRecursiveRule rule - then generateRecursiveRuleForm rhs + then snd $ generateRecursiveRuleForm rhs False else case rhs of [Right s] -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) where - generateRecursiveRuleForm :: [Either Cat String] -> [String] - generateRecursiveRuleForm ([]) = mempty - generateRecursiveRuleForm (r:[]) = case r of - Left cat -> if isCoercionCategory cat then [getBasesOfRecursiveRule cf rule] else rhsToSafeStrings [r] - Right _ -> rhsToSafeStrings [r] - generateRecursiveRuleForm (r:rest) = generateRecursiveRuleForm [r] ++ generateRecursiveRuleForm rest + generateRecursiveRuleForm :: [Either Cat String] -> Bool -> (Bool, [String]) + generateRecursiveRuleForm [] added = (added, []) + generateRecursiveRuleForm (r:rest) added = + case r of + Left cat -> + if isCoercionCategory cat && not added + then + let (_, strsRest) = generateRecursiveRuleForm rest True + in (True, getBasesOfRecursiveRule cf rule : strsRest) + else + let (addedRest, strsRest) = generateRecursiveRuleForm rest added + in (addedRest, rhsToSafeStrings [r] ++ strsRest) + Right _ -> + let (addedRest, strsRest) = generateRecursiveRuleForm rest added + in (addedRest, rhsToSafeStrings [r] ++ strsRest) paramS s = fromMaybe (map toUpper s) (symbolToName s) From d40fc4c29ab8bc5577985bbaa9848af34f8829b1 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 14 Apr 2025 21:58:35 +0200 Subject: [PATCH 39/42] fix case of !false in chi --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 23 +++++++++---------- source/src/BNFC/Backend/Scala/Utils.hs | 16 ++++--------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 2535605ab..957f99a56 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -37,7 +37,7 @@ import BNFC.CF Rul(Rule, valRCat, rhsRule), Rule, TokenCat, - WithPosition(wpThing) ) + WithPosition(wpThing), strToCat ) import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) @@ -48,6 +48,8 @@ import Data.Maybe (listToMaybe, fromMaybe, isJust) import BNFC.Utils ((+++), symbolToName) import BNFC.Backend.Scala.CFtoScalaParserAST (getASTNames) import GHC.Unicode (toUpper) +import BNFC.Backend.Scala.Utils (isSymbol) +import Debug.Trace (trace) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -120,7 +122,6 @@ generateRuleGroup cf (cat, rules) = subFuns = getRulesFunsName nonCoercionRules cat catName = safeCatName cat nonCoercionRules = reverse $ map snd $ sortRulesByPrecedence $ filter (not . isCoercion) rules - -- rulesToProcess = filter (\rule -> not (isListCat (wpThing $ valRCat rule))) nonCoercionRules generateSingleRuleBody :: CF -> Cat -> Rule -> [String] generateSingleRuleBody cf cat rule = [generateRuleDefinition cf cat rule ++ generateRuleTransformation rule] @@ -131,16 +132,16 @@ generateRuleDefinition cf cat rule = +++ intercalate " ~ " (generateRuleForm cf rule) -getBaseCatOfRecursiveRule :: Rule -> [Cat] -getBaseCatOfRecursiveRule (Rule _ _ rhs _) = +getBaseCatOfRecursiveRule :: Rule -> [String] +getBaseCatOfRecursiveRule rule@(Rule _ _ rhs _) = nub $ concatMap extractBaseCat rhs where -- Extrae las categorías base de un elemento del RHS - extractBaseCat :: Either Cat String -> [Cat] + extractBaseCat :: Either Cat String -> [String] extractBaseCat (Left cat) - | isBaseCat cat = [cat] + | isBaseCat cat = [getRuleFunName cat rule] | otherwise = [] - extractBaseCat (Right _) = [] + extractBaseCat (Right s) = if isSymbol s then [] else [getRuleFunName (strToCat s) rule] isBaseCat :: Cat -> Bool isBaseCat cat = isSpecialCat $ normCat cat @@ -150,7 +151,7 @@ getBasesOfRecursiveRule :: CF -> Rule -> String getBasesOfRecursiveRule cf rule = let allRulesForCat = rulesForNormalizedCat cf (normCat $ wpThing $ valRCat rule) - baseTypes = nub $ map safeCatName $ concatMap getBaseCatOfRecursiveRule allRulesForCat + baseTypes = nub $ concatMap getBaseCatOfRecursiveRule allRulesForCat in case baseTypes of [] -> "" @@ -208,7 +209,7 @@ isRuleOnlySpecials (Rule _ _ rhs _) = -- -- | Generate a case statement for a rule generateCaseStatement :: Rule -> String -generateCaseStatement rule@(Rule fun _ rhs _) +generateCaseStatement rule@(Rule fun c rhs _) | isCoercion fun = "" | null vars = "_ => " ++ fnm ++ "()" | otherwise = @@ -220,7 +221,7 @@ generateCaseStatement rule@(Rule fun _ rhs _) | isListCat cat = "List[WorkflowAST]" | otherwise = "WorkflowAST" - fnm = fromMaybe (funName fun) $ listToMaybe $ getASTNames [rule] + fnm = fromMaybe (getRuleFunName (wpThing c) rule) $ listToMaybe $ getASTNames [rule] -- generate a list of with (rule, finalName) zipped = zip rhs (disambiguateNames $ map getSymb rhs) @@ -235,8 +236,6 @@ generateCaseStatement rule@(Rule fun _ rhs _) | (Left cat, p) <- zipped ] - - -- | Generate a special rule for tokens like Integer or String generateSpecialRule :: TokenCat -> String -> String -> String -> String -> [(Cat, [Rule])] -> [Doc] generateSpecialRule tokenCat ruleName tokenName conversion pTypeName catsAndRules = diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index cd5d3329a..7247af348 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -14,8 +14,8 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, - getSymbFromName, catToStrings, getFunName, hasTokenCat, safeRefCatName, inspectListRulesByCategory, isListCat, disambiguateTuples, - wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule, isCoercionRule, isCoercionCategory + getSymbFromName, catToStrings, getFunName, hasTokenCat, inspectListRulesByCategory, isListCat, disambiguateTuples, + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule, isCoercionRule, isCoercionCategory, isSymbol ) where import BNFC.CF import Data.Map @@ -26,6 +26,7 @@ import Data.Char (toUpper) import Text.PrettyPrint import BNFC.PrettyPrint import Data.List (isSuffixOf) +import Data.Maybe (isJust) generateVarsList :: [a] -> [String] @@ -133,15 +134,6 @@ safeCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScala _ -> firstLowerCase $ show cat -safeRefCatName :: Cat -> String -safeRefCatName cat = fromMaybe notSafeScalaCatName (scalaReserverWords notSafeScalaCatName) - where - notSafeScalaCatName = case cat of - ListCat innerCat -> firstUpperCase (safeCatName innerCat) - _ -> firstLowerCase $ show cat - - - firstUpperCase :: String -> String firstUpperCase [] = [] firstUpperCase (x:xs) = toUpper x : xs @@ -199,6 +191,8 @@ isSpecialCat :: Cat -> Bool isSpecialCat (TokenCat cat) = cat `elem` specialCatsP isSpecialCat _ = False +isSymbol :: String -> Bool +isSymbol = isJust . symbolToName -- Function to generate a list of Doc from a (Cat, [Rule]) inspectRulesByCategory :: (Cat, [Rule]) -> [Doc] From 54670fa8cf4fff0f90bb142fa6ce7ec9cb0fd601 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Mon, 14 Apr 2025 22:54:59 +0200 Subject: [PATCH 40/42] fix types order error in lexer --- source/src/BNFC/Backend/Scala/CFtoScalaLex.hs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs index 233ed12e1..7a265358f 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaLex.hs @@ -47,7 +47,7 @@ cf2ScalaLex Options{ lang } cf = vcat $ -- Add apply function getApplyFunction ++ -- Add tokens function - getTokensFunction (symbs ++ keyWords ++ liters) ++ + getTokensFunction (keyWords ++ symbs ++ liters) ++ -- Add parser functions for literals map addParserFunction liters ++ -- Add keyword parsers @@ -137,8 +137,8 @@ getIntegerFunction = vcat [ getDoubleFunction :: Doc getDoubleFunction = vcat [ text "", - text "def double: Parser[Double] = {", - nest 4 $ text "\"[0-9]+.[0-9]+\".r ^^ {i => Double(i)}", + text "def double: Parser[DOUBLE] = {", + nest 4 $ text "\"[0-9]+.[0-9]+\".r ^^ {i => DOUBLE(i)}", text "}" ] @@ -175,22 +175,22 @@ getSymbFromName s = -- | Generate the tokens function getTokensFunction :: [String] -> [Doc] -getTokensFunction symbs = [ +getTokensFunction tokens = [ text "", text "def tokens: Parser[List[WorkflowToken]] = {", nest 4 $ text $ "phrase(rep1( " ++ intercalate " | " proccesedRepetedSymbs ++ "))", text "}" ] where - proccesedRepetedSymbs = applyToRepeated ("p" ++) $ getListSymNames symbs + proccesedRepetedSymbs = reverse $ applyToRepeated ("p" ++) $ reverse $ getListTokensNames tokens -- | Get the lowercase symbol name -getSymName :: String -> String -getSymName s = fromMaybe (toLowerString $ getSymbFromName s) $ scalaReserverWords $ toLowerString $ getSymbFromName s +getTokenName :: String -> String +getTokenName s = fromMaybe (toLowerString $ getSymbFromName s) $ scalaReserverWords $ toLowerString $ getSymbFromName s -- | Get a list of symbol names -getListSymNames :: [String] -> [String] -getListSymNames = map getSymName +getListTokensNames :: [String] -> [String] +getListTokensNames = map getTokenName -- | Generate a keyword parser for a symbol getBaseLexs :: String -> Doc @@ -200,5 +200,5 @@ getBaseLexs symb = where param = map toUpper $ getSymbFromName symb param' = fromMaybe param $ mapManualTypeMap param - defName = fromMaybe (getSymName symb) $ scalaBNFCReserverWords $ getSymName symb + defName = fromMaybe (getTokenName symb) $ scalaBNFCReserverWords $ getTokenName symb From 92bb9f8e024aa2ccd2668a1e20e002f3f00ed71f Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Wed, 16 Apr 2025 18:37:26 +0200 Subject: [PATCH 41/42] clean up a little bit and fix cpp parser generation --- .../src/BNFC/Backend/Scala/CFtoScalaParser.hs | 21 +- source/src/BNFC/Backend/Scala/Utils.hs | 223 +++++++++--------- 2 files changed, 122 insertions(+), 122 deletions(-) diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs index 957f99a56..05f3b1825 100644 --- a/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs +++ b/source/src/BNFC/Backend/Scala/CFtoScalaParser.hs @@ -18,7 +18,7 @@ module BNFC.Backend.Scala.CFtoScalaParser (cf2ScalaParser) where import qualified Data.Foldable as DF (toList) import Prelude hiding ((<>)) -import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, isCoercionCategory) +import BNFC.Backend.Scala.Utils (safeCatName, hasTokenCat, rhsToSafeStrings, disambiguateNames, getRHSCats, isSpecialCat, isListCat, isLeft, safeHeadChar, wildCardSymbs, scalaReserverWords, mapManualTypeMap, isCoercionCategory, getTerminalFromListRules, isSymbol) import BNFC.CF ( allEntryPoints, catChar, @@ -37,7 +37,7 @@ import BNFC.CF Rul(Rule, valRCat, rhsRule), Rule, TokenCat, - WithPosition(wpThing), strToCat ) + WithPosition(wpThing), strToCat, catDouble, rulesForCat ) import BNFC.PrettyPrint import BNFC.Options ( SharedOptions(lang, Options) ) import BNFC.Backend.Common.NamedVariables (firstLowerCase, fixCoercions) @@ -48,8 +48,6 @@ import Data.Maybe (listToMaybe, fromMaybe, isJust) import BNFC.Utils ((+++), symbolToName) import BNFC.Backend.Scala.CFtoScalaParserAST (getASTNames) import GHC.Unicode (toUpper) -import BNFC.Backend.Scala.Utils (isSymbol) -import Debug.Trace (trace) -- | Main function that generates the Scala parser code cf2ScalaParser :: SharedOptions -> CF -> Doc @@ -94,11 +92,12 @@ generateAllRules cf catsAndRules = -- existe sigLookup wtf con esto -- Generate special rules for integer and string if needed integerRule = generateSpecialRule catInteger "integer" "INTEGER" "toInt" "pInteger" catsAndRules + doubleRule = generateSpecialRule catDouble "double" "DOUBLE" "toInt" "pDouble" catsAndRules stringRule = generateSpecialRule catString "string" "STRING" "toString" "pString" catsAndRules - charRule = generateSpecialRule catChar "char" "CHAR" "charAt(0)" "pChar" catsAndRules + charRule = generateSpecialRule catChar "char" "CHAR" "toString" "pChar" catsAndRules identRule = generateSpecialRule catIdent "ident" "IDENT" "toString" "pIdent" catsAndRules - in mainRules ++ integerRule ++ stringRule ++ charRule ++ identRule + in mainRules ++ integerRule ++ stringRule ++ charRule ++ identRule ++ doubleRule getRuleFunName :: Cat -> Rule -> String @@ -165,7 +164,7 @@ generateRuleForm cf rule@(Rule _ _ rhs _) = then snd $ generateRecursiveRuleForm rhs False else case rhs of [Right s] -> [fromMaybe (paramS s) (mapManualTypeMap (paramS s)) ++ "()"] - _ -> map (addRuleForListCat rhs) (rhsToSafeStrings rhs) + _ -> map (addRuleForListCat cf rhs) (rhsToSafeStrings rhs) where generateRecursiveRuleForm :: [Either Cat String] -> Bool -> (Bool, [String]) generateRecursiveRuleForm [] added = (added, []) @@ -197,10 +196,12 @@ isRecursiveRule :: Rule -> Bool isRecursiveRule (Rule _ cat rhs _) = any (sameCat (wpThing cat)) (getRHSCats rhs) -addRuleForListCat :: [Either Cat String] -> String -> String -addRuleForListCat rhs s = +addRuleForListCat :: CF -> [Either Cat String] -> String -> String +addRuleForListCat cf rhs s = case find (\cat -> isListCat cat && safeCatName cat == s) (getRHSCats rhs) of - Just _ -> "rep(" ++ firstLowerCase s ++ ")" + Just c -> case (getTerminalFromListRules $ rulesForCat cf c) of + Just term -> "repsep(" ++ firstLowerCase s ++ ", " ++ term ++ "())" + _ -> "rep(" ++ firstLowerCase s ++ ")" Nothing -> s isRuleOnlySpecials :: Rule -> Bool diff --git a/source/src/BNFC/Backend/Scala/Utils.hs b/source/src/BNFC/Backend/Scala/Utils.hs index 7247af348..d123f553f 100644 --- a/source/src/BNFC/Backend/Scala/Utils.hs +++ b/source/src/BNFC/Backend/Scala/Utils.hs @@ -14,8 +14,9 @@ module BNFC.Backend.Scala.Utils ( generateVarsList, unwrapListCat, baseTypeToScalaType, safeTail, rhsToSafeStrings, disambiguateNames, safeCatToStrings, wrapList, safeHeadString, scalaReserverWords, safeCatName, isLeft, getRHSCats, isSpecialCat, firstUpperCase, safeHeadChar, - getSymbFromName, catToStrings, getFunName, hasTokenCat, inspectListRulesByCategory, isListCat, disambiguateTuples, - wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, prettyRule, isCoercionRule, isCoercionCategory, isSymbol + getSymbFromName, catToStrings, getFunName, hasTokenCat, isListCat, disambiguateTuples, getListSeparator, + wildCardSymbs, mapManualTypeMap, scalaBNFCReserverWords, applyToRepeated, isCoercionRule, isCoercionCategory, isSymbol, + getTerminalFromListRules ) where import BNFC.CF import Data.Map @@ -23,12 +24,11 @@ import BNFC.Backend.Common.NamedVariables (firstLowerCase) import System.Directory.Internal.Prelude (fromMaybe) import BNFC.Utils (symbolToName) import Data.Char (toUpper) -import Text.PrettyPrint -import BNFC.PrettyPrint -import Data.List (isSuffixOf) +import Data.List (isSuffixOf, find) import Data.Maybe (isJust) + generateVarsList :: [a] -> [String] generateVarsList xs = zipWith (\_ (i :: Int) -> "var" ++ show i) xs [1..] @@ -43,74 +43,6 @@ wrapList cat s = case cat of ListCat _ -> "List[" ++ s ++ "]" _ -> s --- | Convert base LBNF type to Scala type -baseTypeToScalaType :: String -> Maybe String -baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) - --- | Map from base LBNF Type to scala Type -baseTypeMap :: Map String String -baseTypeMap = fromList scalaTypesMap - --- | Scala types mapping -scalaTypesMap :: [(String, String)] -scalaTypesMap = - [ ("Integer" , "Int") - , ("String" , "String") - , ("Double" , "Double") - , ("Ident" , "String") - ] - -scalaReserverWords :: String -> Maybe String -scalaReserverWords = (`Data.Map.lookup` reserverWordsMap) - --- | Map from base LBNF Type to scala Type -reserverWordsMap :: Map String String -reserverWordsMap = fromList wordsMap - --- | Scala reserverd words mapping -wordsMap :: [(String, String)] -wordsMap = - [ - ("def" , "pdef") - , ("val" , "pval") - , ("var" , "pvar") - , ("class", "pclass") - , ("type", "ptype") - , ("object", "pobject") - , ("trait", "ptrait") - , ("extends", "pextends") - , ("with", "pwith") - , ("case", "pcase") - , ("sealed", "psealed") - , ("abstract", "pabstract") - , ("final", "pfinal") - , ("override", "poverride") - , ("implicit", "pimplicit") - , ("lazy", "plazy") - , ("private", "pprivate") - , ("protected", "pprotected") - , ("public", "ppublic") - , ("import", "pimport") - , ("package", "ppackage") - , ("return", "preturn") - , ("if", "pif") - , ("else", "pelse") - , ("while", "pwhile") - , ("for", "pfor") - , ("do", "pdo") - , ("match", "pmatch") - , ("try", "pttry") - , ("catch", "pcatch") - , ("finally", "pfinally") - , ("throw", "pthrow") - , ("true", "ptrue") - , ("false", "pfalse") - , ("apply", "papply") - , ("Int", "pInt") - , ("String", "pString") - , ("program", "pprogram") - ] - -- | Safe version of tail that returns an empty list for an empty list safeTail :: [a] -> [a] safeTail [] = [] @@ -179,6 +111,17 @@ rhsToSafeStrings = Prelude.map (\case getRHSCats :: [Either Cat String] -> [Cat] getRHSCats rhs = [c | Left c <- rhs] +getTerminalFromListRules :: [Rule] -> Maybe String +getTerminalFromListRules rules = (find (not . (== "")) terminals) + where + terminals = Prelude.concatMap (\rule -> + Prelude.map (\rhs -> case rhs of + Right s -> fromMaybe "" $ symbolToName s + _ -> "" + ) (rhsRule rule) + + ) rules + -- | Get the function name for a rule getFunName :: Rule -> String getFunName (Rule fun _ _ _) = wpThing fun @@ -194,29 +137,6 @@ isSpecialCat _ = False isSymbol :: String -> Bool isSymbol = isJust . symbolToName --- Function to generate a list of Doc from a (Cat, [Rule]) -inspectRulesByCategory :: (Cat, [Rule]) -> [Doc] -inspectRulesByCategory (cat, rules) = - [text "Category:" <+> pretty cat] ++ - [text "Rules:" $$ nest 2 (vcat (Prelude.map (vcat . Prelude.map text . prettyRule) rules))] - --- Pretty-print a single Rule -prettyRule :: Rule -> [String] -prettyRule (Rule fun valCat rhs internal) = - [ "Function: " ++ (show $ wpThing fun) - , "Value Category: " ++ (show $ wpThing valCat) - , "RHS: " ++ show rhs - , "Internal: " ++ internalRuleToStrings internal - ] - -internalRuleToStrings :: InternalRule -> String -internalRuleToStrings Internal = "Internal" -internalRuleToStrings Parsable = "Parsable" - - --- | Convert a list of rules to a list of strings -inspectListRulesByCategory :: [(Cat, [Rule])] -> [Doc] -inspectListRulesByCategory rulesByCat = concatMap inspectRulesByCategory rulesByCat isLeft :: Either a b -> Bool isLeft (Left _) = True @@ -244,6 +164,34 @@ disambiguateTuples tuples = newNames = disambiguateNames names in zip newNames values +-- | applies a function only to the first occurrence of a repetead element of the list +-- | example: applyToRepeated (++ "1") ["a", "b", "a", "c"] +-- | ["a", "b", "a1", "c"] +applyToRepeated :: Eq a => (a -> a) -> [a] -> [a] +applyToRepeated f xs = apply [] xs + where + apply _ [] = [] + apply seen (y:ys) + | y `elem` seen = f y : ys + | otherwise = y : apply (y:seen) ys + +-- | Check if a rule is a coercion rule +isCoercionRule :: Rule -> Bool +isCoercionRule (Rule fun _ _ _) = isCoercion fun + +isCoercionCategory :: Cat -> Bool +isCoercionCategory (CoercCat _ _) = True +isCoercionCategory _ = False + +getListSeparator :: CF -> Rule -> Maybe Symbol +getListSeparator cf (Rule _ cat _ _) = + case normCat (wpThing cat) of + ListCat baseCat -> + let (_, layoutKeywords, _) = layoutPragmas cf + in Prelude.lookup (show (normCat baseCat)) layoutKeywords >>= Just . listSep + _ -> Nothing + + -- | Convert base LBNF type to Scala type mapManualTypeMap :: String -> Maybe String mapManualTypeMap = (`Data.Map.lookup` manualTypeMap) @@ -257,6 +205,7 @@ manualTypesMap :: [(String, String)] manualTypesMap = [ ("INTEGER" , "PINTEGER") , ("STRING" , "PSTRING") + , ("DOUBLE" , "PDOUBLE") , ("CHAR" , "PCHAR") , ("IDENT" , "PIDENT") ] @@ -275,27 +224,77 @@ scalaBNFCReserverWordsMap :: [(String, String)] scalaBNFCReserverWordsMap = [ ("string" , "pstring") , ("integer" , "pinteger") + , ("double" , "pdouble") , ("char" , "pchar") , ("program" , "internal_program") , ("apply" , "internal_apply") ] --- | applies a function only to the first occurrence of a repetead element of the list --- | example: applyToRepeated (++ "1") ["a", "b", "a", "c"] --- | ["a", "b", "a1", "c"] -applyToRepeated :: Eq a => (a -> a) -> [a] -> [a] -applyToRepeated f xs = apply [] xs - where - apply _ [] = [] - apply seen (y:ys) - | y `elem` seen = f y : ys - | otherwise = y : apply (y:seen) ys +-- | Convert base LBNF type to Scala type +baseTypeToScalaType :: String -> Maybe String +baseTypeToScalaType = (`Data.Map.lookup` baseTypeMap) --- | Check if a rule is a coercion rule -isCoercionRule :: Rule -> Bool -isCoercionRule (Rule fun _ _ _) = isCoercion fun +-- | Map from base LBNF Type to scala Type +baseTypeMap :: Map String String +baseTypeMap = fromList scalaTypesMap -isCoercionCategory :: Cat -> Bool -isCoercionCategory (CoercCat _ _) = True -isCoercionCategory _ = False +-- | Scala types mapping +scalaTypesMap :: [(String, String)] +scalaTypesMap = + [ ("Integer" , "Int") + , ("String" , "String") + , ("Double" , "Double") + , ("Ident" , "String") + ] + +scalaReserverWords :: String -> Maybe String +scalaReserverWords = (`Data.Map.lookup` reserverWordsMap) + +-- | Map from base LBNF Type to scala Type +reserverWordsMap :: Map String String +reserverWordsMap = fromList wordsMap + +-- | Scala reserverd words mapping +wordsMap :: [(String, String)] +wordsMap = + [ + ("def" , "pdef") + , ("val" , "pval") + , ("var" , "pvar") + , ("class", "pclass") + , ("type", "ptype") + , ("object", "pobject") + , ("trait", "ptrait") + , ("extends", "pextends") + , ("with", "pwith") + , ("case", "pcase") + , ("sealed", "psealed") + , ("abstract", "pabstract") + , ("final", "pfinal") + , ("override", "poverride") + , ("implicit", "pimplicit") + , ("lazy", "plazy") + , ("private", "pprivate") + , ("protected", "pprotected") + , ("public", "ppublic") + , ("import", "pimport") + , ("package", "ppackage") + , ("return", "preturn") + , ("if", "pif") + , ("else", "pelse") + , ("while", "pwhile") + , ("for", "pfor") + , ("do", "pdo") + , ("match", "pmatch") + , ("try", "pttry") + , ("catch", "pcatch") + , ("finally", "pfinally") + , ("throw", "pthrow") + , ("true", "ptrue") + , ("false", "pfalse") + , ("apply", "papply") + , ("Int", "pInt") + , ("String", "pString") + , ("program", "pprogram") + ] From 8d7f6980c19611e5ecd9775324b7555b306099bd Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Thu, 17 Apr 2025 00:38:07 +0200 Subject: [PATCH 42/42] remove abs --- source/BNFC.cabal | 1 - source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs | 62 ------------------- 2 files changed, 63 deletions(-) delete mode 100644 source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 999b062c6..b4de72dfd 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -275,7 +275,6 @@ library -- Scala backend BNFC.Backend.Scala BNFC.Backend.Scala.Utils - BNFC.Backend.Scala.CFtoScalaAbs BNFC.Backend.Scala.CFtoScalaLex BNFC.Backend.Scala.CFtoScalaLexToken BNFC.Backend.Scala.CFtoScalaParser diff --git a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs b/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs deleted file mode 100644 index e8ecc53a8..000000000 --- a/source/src/BNFC/Backend/Scala/CFtoScalaAbs.hs +++ /dev/null @@ -1,62 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE OverloadedStrings #-} - -{- - BNF Converter: Scala Abstract syntax - Copyright (Scala) 2024 Author: Juan Pablo Poittevin - - Description : This module generates the Scala Abstract Syntax - tree classes. It generates both a Header file - and an Implementation file - - Author : Juan Pablo Poittevin - Created : 30 September, 2024 --} - -module BNFC.Backend.Scala.CFtoScalaAbs (cf2ScalaAbs) where - -import Prelude hiding ((<>)) - -import BNFC.CF -import BNFC.PrettyPrint -import BNFC.Options - --- | The result is two files (.H file, .C file) -cf2ScalaAbs - :: SharedOptions - -> CF -- ^ Grammar. - -> Doc -- ^ @.H@ file, @.C@ file. -cf2ScalaAbs Options{ lang } cf = vsep . concat $ - [ - [] - , ["package" <+> text lang] - , [hang classSign 4 $ functions] - , defaultFunction lang - , ["}"] - -- , [text $ concat $ map dataToString datas] - ] - where - datas = cf2data cf - functions = vcat (map prData datas) - -classSign :: Doc -classSign = "abstract class AbstractVisitor[R, A] extends AllVisitor[R, A]{" - -prData :: Data -> Doc -prData (_, rules) = vcat $ concat - [ - [constructors rules] - ] - where - prRule (fun, _) = hsep $ concat [ ["def visit(p: " <+> text ("Absyn." ++ fun) <+> text ", arg: A): R = visitDefault(p, arg)"] ] - constructors [] = empty - constructors (h:t) = sep $ [prRule h] ++ map prRule t - -defaultFunction :: String -> [Doc] -defaultFunction lang = - [ - nest 4 $ text $ "def visitDefault(p: Absyn." ++ lang ++ ", arg: A): R = {" - , nest 8 "throw new IllegalArgumentException(this.getClass.getName + \": \" + p)" - , nest 4 "}" - ] \ No newline at end of file