From 95a4d822b6e6ce121f6874f7dd80ef2c6285cae6 Mon Sep 17 00:00:00 2001 From: William Ball Date: Sun, 8 Dec 2024 21:42:13 -0800 Subject: [PATCH] drastically sped up parser --- lib/Parser.hs | 106 ++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/lib/Parser.hs b/lib/Parser.hs index b6105fc..562325c 100644 --- a/lib/Parser.hs +++ b/lib/Parser.hs @@ -2,13 +2,14 @@ module Parser where +import Control.Monad.Combinators (option) import Control.Monad.Except import Data.List (foldl, foldl1) import qualified Data.Text as T import Errors (Error (..)) import IR import Preprocessor -import Text.Megaparsec (Parsec, ShowErrorComponent (..), between, choice, chunk, errorBundlePretty, label, runParser, try) +import Text.Megaparsec (MonadParsec (..), Parsec, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParser, try) import Text.Megaparsec.Char import qualified Text.Megaparsec.Char.Lexer as L @@ -30,8 +31,11 @@ skipSpace = lexeme :: Parser a -> Parser a lexeme = L.lexeme skipSpace -eat :: Text -> Parser () -eat = void . lexeme . chunk +symbol :: Text -> Parser () +symbol = void . L.symbol skipSpace + +pKeyword :: Text -> Parser () +pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar) keywords :: [Text] keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"] @@ -41,21 +45,16 @@ pIdentifier = try $ label "identifier" $ lexeme $ do firstChar <- letterChar <|> char '_' rest <- many $ alphaNumChar <|> char '_' let ident = T.pack (firstChar : rest) - when (ident `elem` keywords) $ - fail $ - "Reserved word: " ++ T.unpack ident + guard (ident `notElem` keywords) pure ident pVar :: Parser IRExpr pVar = label "variable" $ lexeme $ Var <$> pIdentifier -defChoice :: NonEmpty Text -> Parser () -defChoice options = lexeme $ label (T.unpack $ head options) $ void $ choice $ fmap chunk options - pParamGroup :: Parser [Param] pParamGroup = lexeme $ label "parameter group" $ between (char '(') (char ')') $ do idents <- some pIdentifier - eat ":" + symbol ":" ty <- pIRExpr pure $ map (,ty) idents @@ -73,31 +72,31 @@ mkPi ascription (param, typ) = Pi param typ ascription pLAbs :: Parser IRExpr pLAbs = lexeme $ label "λ-abstraction" $ do - _ <- defChoice $ "λ" :| ["fun"] + _ <- pKeyword "fun" <|> symbol "λ" params <- pSomeParams ascription <- optional pAscription - _ <- defChoice $ "=>" :| ["⇒"] + _ <- symbol "=>" <|> symbol "⇒" body <- pIRExpr pure $ foldr (mkAbs ascription) body params pPAbs :: Parser IRExpr pPAbs = lexeme $ label "Π-abstraction" $ do - _ <- defChoice $ "∏" :| ["forall", "∀"] + _ <- pKeyword "forall" <|> symbol "∏" <|> symbol "∀" params <- pSomeParams ascription <- optional pAscription - eat "," + symbol "," body <- pIRExpr pure $ foldr (mkPi ascription) body params pBinding :: Parser (Text, Maybe IRExpr, IRExpr) pBinding = lexeme $ label "binding" $ do - eat "(" + symbol "(" ident <- pIdentifier params <- pManyParams ascription <- optional pAscription - eat ":=" + symbol ":=" value <- pIRExpr - eat ")" + symbol ")" pure ( ident , flip (foldr (mkPi Nothing)) params <$> ascription @@ -106,54 +105,45 @@ pBinding = lexeme $ label "binding" $ do pLet :: Parser IRExpr pLet = lexeme $ label "let expression" $ do - eat "let" + pKeyword "let" bindings <- some pBinding - eat "in" - body <- try pIRExpr - eat "end" + pKeyword "in" + body <- pIRExpr + pKeyword "end" pure $ foldr letTuple body bindings where letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr letTuple (name, ascription, value) = Let name ascription value -pArrow :: Parser IRExpr -pArrow = lexeme $ label "->" $ do - a <- pAppTerm - _ <- defChoice $ "->" :| ["→"] - Pi "" a Nothing <$> pIRExpr - pApp :: Parser IRExpr pApp = lexeme $ foldl1 App <$> some pTerm pStar :: Parser IRExpr -pStar = lexeme $ Star <$ eat "*" - -pSquare :: Parser IRExpr -pSquare = lexeme $ Level 0 <$ defChoice ("□" :| ["[]"]) +pStar = lexeme $ Star <$ symbol "*" subscriptDigit :: Parser Integer subscriptDigit = choice - [ chunk "₀" >> pure 0 - , chunk "₁" >> pure 1 - , chunk "₂" >> pure 2 - , chunk "₃" >> pure 3 - , chunk "₄" >> pure 4 - , chunk "₅" >> pure 5 - , chunk "₆" >> pure 6 - , chunk "₇" >> pure 7 - , chunk "₈" >> pure 8 - , chunk "₉" >> pure 9 + [ symbol "₀" >> pure 0 + , symbol "₁" >> pure 1 + , symbol "₂" >> pure 2 + , symbol "₃" >> pure 3 + , symbol "₄" >> pure 4 + , symbol "₅" >> pure 5 + , symbol "₆" >> pure 6 + , symbol "₇" >> pure 7 + , symbol "₈" >> pure 8 + , symbol "₉" >> pure 9 ] subscriptInt :: Parser Integer subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit -pNumSort :: Parser IRExpr -pNumSort = lexeme $ pSquare >> Level <$> (L.decimal <|> subscriptInt) +pSquare :: Parser IRExpr +pSquare = lexeme $ (symbol "□" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt)) pSort :: Parser IRExpr -pSort = lexeme $ pStar <|> try pNumSort <|> pSquare +pSort = lexeme $ pStar <|> pSquare pTerm :: Parser IRExpr pTerm = lexeme $ label "term" $ choice [pSort, pVar, between (char '(') (char ')') pIRExpr] @@ -162,45 +152,49 @@ pAppTerm :: Parser IRExpr pAppTerm = lexeme $ choice [pLAbs, pPAbs, pLet, pApp] pIRExpr :: Parser IRExpr -pIRExpr = lexeme $ try pArrow <|> pAppTerm +pIRExpr = lexeme $ do + e <- pAppTerm + option e $ do + _ <- symbol "->" <|> symbol "→" + Pi "" e Nothing <$> pIRExpr pAscription :: Parser IRExpr -pAscription = lexeme $ try $ eat ":" >> label "type" pIRExpr +pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr pAxiom :: Parser IRDef pAxiom = lexeme $ label "axiom" $ do - eat "axiom" + pKeyword "axiom" ident <- pIdentifier params <- pManyParams ascription <- fmap (flip (foldr (mkPi Nothing)) params) pAscription - eat ";" + symbol ";" pure $ Axiom ident ascription pVariable :: Parser [IRSectionDef] pVariable = lexeme $ label "variable declaration" $ do - eat "variable" <|> eat "hypothesis" + pKeyword "variable" <|> pKeyword "hypothesis" vars <- pManyParams - eat ";" + symbol ";" pure $ uncurry Variable <$> vars pDef :: Parser IRDef pDef = lexeme $ label "definition" $ do - eat "def" + pKeyword "def" ident <- pIdentifier params <- pManyParams ascription <- fmap (flip (foldr (mkPi Nothing)) params) <$> optional pAscription - eat ":=" + symbol ":=" body <- pIRExpr - eat ";" + symbol ";" pure $ Def ident ascription $ foldr (mkAbs Nothing) body params pSection :: Parser IRSectionDef pSection = lexeme $ label "section" $ do - eat "section" + pKeyword "section" secLabel <- pIdentifier body <- concat <$> many pIRDef - eat "end" - void $ lexeme $ chunk secLabel + pKeyword "end" + void $ lexeme $ string secLabel pure $ Section secLabel body pIRDef :: Parser [IRSectionDef]