drastically sped up parser

This commit is contained in:
William Ball 2024-12-08 21:42:13 -08:00
parent 6f34793ba2
commit 95a4d822b6

View file

@ -2,13 +2,14 @@
module Parser where module Parser where
import Control.Monad.Combinators (option)
import Control.Monad.Except import Control.Monad.Except
import Data.List (foldl, foldl1) import Data.List (foldl, foldl1)
import qualified Data.Text as T import qualified Data.Text as T
import Errors (Error (..)) import Errors (Error (..))
import IR import IR
import Preprocessor import Preprocessor
import Text.Megaparsec (Parsec, ShowErrorComponent (..), between, choice, chunk, errorBundlePretty, label, runParser, try) import Text.Megaparsec (MonadParsec (..), Parsec, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParser, try)
import Text.Megaparsec.Char import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L import qualified Text.Megaparsec.Char.Lexer as L
@ -30,8 +31,11 @@ skipSpace =
lexeme :: Parser a -> Parser a lexeme :: Parser a -> Parser a
lexeme = L.lexeme skipSpace lexeme = L.lexeme skipSpace
eat :: Text -> Parser () symbol :: Text -> Parser ()
eat = void . lexeme . chunk symbol = void . L.symbol skipSpace
pKeyword :: Text -> Parser ()
pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar)
keywords :: [Text] keywords :: [Text]
keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"] keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"]
@ -41,21 +45,16 @@ pIdentifier = try $ label "identifier" $ lexeme $ do
firstChar <- letterChar <|> char '_' firstChar <- letterChar <|> char '_'
rest <- many $ alphaNumChar <|> char '_' rest <- many $ alphaNumChar <|> char '_'
let ident = T.pack (firstChar : rest) let ident = T.pack (firstChar : rest)
when (ident `elem` keywords) $ guard (ident `notElem` keywords)
fail $
"Reserved word: " ++ T.unpack ident
pure ident pure ident
pVar :: Parser IRExpr pVar :: Parser IRExpr
pVar = label "variable" $ lexeme $ Var <$> pIdentifier pVar = label "variable" $ lexeme $ Var <$> pIdentifier
defChoice :: NonEmpty Text -> Parser ()
defChoice options = lexeme $ label (T.unpack $ head options) $ void $ choice $ fmap chunk options
pParamGroup :: Parser [Param] pParamGroup :: Parser [Param]
pParamGroup = lexeme $ label "parameter group" $ between (char '(') (char ')') $ do pParamGroup = lexeme $ label "parameter group" $ between (char '(') (char ')') $ do
idents <- some pIdentifier idents <- some pIdentifier
eat ":" symbol ":"
ty <- pIRExpr ty <- pIRExpr
pure $ map (,ty) idents pure $ map (,ty) idents
@ -73,31 +72,31 @@ mkPi ascription (param, typ) = Pi param typ ascription
pLAbs :: Parser IRExpr pLAbs :: Parser IRExpr
pLAbs = lexeme $ label "λ-abstraction" $ do pLAbs = lexeme $ label "λ-abstraction" $ do
_ <- defChoice $ "λ" :| ["fun"] _ <- pKeyword "fun" <|> symbol "λ"
params <- pSomeParams params <- pSomeParams
ascription <- optional pAscription ascription <- optional pAscription
_ <- defChoice $ "=>" :| [""] _ <- symbol "=>" <|> symbol ""
body <- pIRExpr body <- pIRExpr
pure $ foldr (mkAbs ascription) body params pure $ foldr (mkAbs ascription) body params
pPAbs :: Parser IRExpr pPAbs :: Parser IRExpr
pPAbs = lexeme $ label "Π-abstraction" $ do pPAbs = lexeme $ label "Π-abstraction" $ do
_ <- defChoice $ "" :| ["forall", ""] _ <- pKeyword "forall" <|> symbol "" <|> symbol ""
params <- pSomeParams params <- pSomeParams
ascription <- optional pAscription ascription <- optional pAscription
eat "," symbol ","
body <- pIRExpr body <- pIRExpr
pure $ foldr (mkPi ascription) body params pure $ foldr (mkPi ascription) body params
pBinding :: Parser (Text, Maybe IRExpr, IRExpr) pBinding :: Parser (Text, Maybe IRExpr, IRExpr)
pBinding = lexeme $ label "binding" $ do pBinding = lexeme $ label "binding" $ do
eat "(" symbol "("
ident <- pIdentifier ident <- pIdentifier
params <- pManyParams params <- pManyParams
ascription <- optional pAscription ascription <- optional pAscription
eat ":=" symbol ":="
value <- pIRExpr value <- pIRExpr
eat ")" symbol ")"
pure pure
( ident ( ident
, flip (foldr (mkPi Nothing)) params <$> ascription , flip (foldr (mkPi Nothing)) params <$> ascription
@ -106,54 +105,45 @@ pBinding = lexeme $ label "binding" $ do
pLet :: Parser IRExpr pLet :: Parser IRExpr
pLet = lexeme $ label "let expression" $ do pLet = lexeme $ label "let expression" $ do
eat "let" pKeyword "let"
bindings <- some pBinding bindings <- some pBinding
eat "in" pKeyword "in"
body <- try pIRExpr body <- pIRExpr
eat "end" pKeyword "end"
pure $ foldr letTuple body bindings pure $ foldr letTuple body bindings
where where
letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr
letTuple (name, ascription, value) = Let name ascription value letTuple (name, ascription, value) = Let name ascription value
pArrow :: Parser IRExpr
pArrow = lexeme $ label "->" $ do
a <- pAppTerm
_ <- defChoice $ "->" :| [""]
Pi "" a Nothing <$> pIRExpr
pApp :: Parser IRExpr pApp :: Parser IRExpr
pApp = lexeme $ foldl1 App <$> some pTerm pApp = lexeme $ foldl1 App <$> some pTerm
pStar :: Parser IRExpr pStar :: Parser IRExpr
pStar = lexeme $ Star <$ eat "*" pStar = lexeme $ Star <$ symbol "*"
pSquare :: Parser IRExpr
pSquare = lexeme $ Level 0 <$ defChoice ("" :| ["[]"])
subscriptDigit :: Parser Integer subscriptDigit :: Parser Integer
subscriptDigit = subscriptDigit =
choice choice
[ chunk "" >> pure 0 [ symbol "" >> pure 0
, chunk "" >> pure 1 , symbol "" >> pure 1
, chunk "" >> pure 2 , symbol "" >> pure 2
, chunk "" >> pure 3 , symbol "" >> pure 3
, chunk "" >> pure 4 , symbol "" >> pure 4
, chunk "" >> pure 5 , symbol "" >> pure 5
, chunk "" >> pure 6 , symbol "" >> pure 6
, chunk "" >> pure 7 , symbol "" >> pure 7
, chunk "" >> pure 8 , symbol "" >> pure 8
, chunk "" >> pure 9 , symbol "" >> pure 9
] ]
subscriptInt :: Parser Integer subscriptInt :: Parser Integer
subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit
pNumSort :: Parser IRExpr pSquare :: Parser IRExpr
pNumSort = lexeme $ pSquare >> Level <$> (L.decimal <|> subscriptInt) pSquare = lexeme $ (symbol "" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt))
pSort :: Parser IRExpr pSort :: Parser IRExpr
pSort = lexeme $ pStar <|> try pNumSort <|> pSquare pSort = lexeme $ pStar <|> pSquare
pTerm :: Parser IRExpr pTerm :: Parser IRExpr
pTerm = lexeme $ label "term" $ choice [pSort, pVar, between (char '(') (char ')') pIRExpr] pTerm = lexeme $ label "term" $ choice [pSort, pVar, between (char '(') (char ')') pIRExpr]
@ -162,45 +152,49 @@ pAppTerm :: Parser IRExpr
pAppTerm = lexeme $ choice [pLAbs, pPAbs, pLet, pApp] pAppTerm = lexeme $ choice [pLAbs, pPAbs, pLet, pApp]
pIRExpr :: Parser IRExpr pIRExpr :: Parser IRExpr
pIRExpr = lexeme $ try pArrow <|> pAppTerm pIRExpr = lexeme $ do
e <- pAppTerm
option e $ do
_ <- symbol "->" <|> symbol ""
Pi "" e Nothing <$> pIRExpr
pAscription :: Parser IRExpr pAscription :: Parser IRExpr
pAscription = lexeme $ try $ eat ":" >> label "type" pIRExpr pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr
pAxiom :: Parser IRDef pAxiom :: Parser IRDef
pAxiom = lexeme $ label "axiom" $ do pAxiom = lexeme $ label "axiom" $ do
eat "axiom" pKeyword "axiom"
ident <- pIdentifier ident <- pIdentifier
params <- pManyParams params <- pManyParams
ascription <- fmap (flip (foldr (mkPi Nothing)) params) pAscription ascription <- fmap (flip (foldr (mkPi Nothing)) params) pAscription
eat ";" symbol ";"
pure $ Axiom ident ascription pure $ Axiom ident ascription
pVariable :: Parser [IRSectionDef] pVariable :: Parser [IRSectionDef]
pVariable = lexeme $ label "variable declaration" $ do pVariable = lexeme $ label "variable declaration" $ do
eat "variable" <|> eat "hypothesis" pKeyword "variable" <|> pKeyword "hypothesis"
vars <- pManyParams vars <- pManyParams
eat ";" symbol ";"
pure $ uncurry Variable <$> vars pure $ uncurry Variable <$> vars
pDef :: Parser IRDef pDef :: Parser IRDef
pDef = lexeme $ label "definition" $ do pDef = lexeme $ label "definition" $ do
eat "def" pKeyword "def"
ident <- pIdentifier ident <- pIdentifier
params <- pManyParams params <- pManyParams
ascription <- fmap (flip (foldr (mkPi Nothing)) params) <$> optional pAscription ascription <- fmap (flip (foldr (mkPi Nothing)) params) <$> optional pAscription
eat ":=" symbol ":="
body <- pIRExpr body <- pIRExpr
eat ";" symbol ";"
pure $ Def ident ascription $ foldr (mkAbs Nothing) body params pure $ Def ident ascription $ foldr (mkAbs Nothing) body params
pSection :: Parser IRSectionDef pSection :: Parser IRSectionDef
pSection = lexeme $ label "section" $ do pSection = lexeme $ label "section" $ do
eat "section" pKeyword "section"
secLabel <- pIdentifier secLabel <- pIdentifier
body <- concat <$> many pIRDef body <- concat <$> many pIRDef
eat "end" pKeyword "end"
void $ lexeme $ chunk secLabel void $ lexeme $ string secLabel
pure $ Section secLabel body pure $ Section secLabel body
pIRDef :: Parser [IRSectionDef] pIRDef :: Parser [IRSectionDef]