drastically sped up parser

This commit is contained in:
William Ball 2024-12-08 21:42:13 -08:00
parent 6f34793ba2
commit 95a4d822b6

View file

@ -2,13 +2,14 @@
module Parser where
import Control.Monad.Combinators (option)
import Control.Monad.Except
import Data.List (foldl, foldl1)
import qualified Data.Text as T
import Errors (Error (..))
import IR
import Preprocessor
import Text.Megaparsec (Parsec, ShowErrorComponent (..), between, choice, chunk, errorBundlePretty, label, runParser, try)
import Text.Megaparsec (MonadParsec (..), Parsec, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParser, try)
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
@ -30,8 +31,11 @@ skipSpace =
lexeme :: Parser a -> Parser a
lexeme = L.lexeme skipSpace
eat :: Text -> Parser ()
eat = void . lexeme . chunk
symbol :: Text -> Parser ()
symbol = void . L.symbol skipSpace
pKeyword :: Text -> Parser ()
pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar)
keywords :: [Text]
keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"]
@ -41,21 +45,16 @@ pIdentifier = try $ label "identifier" $ lexeme $ do
firstChar <- letterChar <|> char '_'
rest <- many $ alphaNumChar <|> char '_'
let ident = T.pack (firstChar : rest)
when (ident `elem` keywords) $
fail $
"Reserved word: " ++ T.unpack ident
guard (ident `notElem` keywords)
pure ident
pVar :: Parser IRExpr
pVar = label "variable" $ lexeme $ Var <$> pIdentifier
defChoice :: NonEmpty Text -> Parser ()
defChoice options = lexeme $ label (T.unpack $ head options) $ void $ choice $ fmap chunk options
pParamGroup :: Parser [Param]
pParamGroup = lexeme $ label "parameter group" $ between (char '(') (char ')') $ do
idents <- some pIdentifier
eat ":"
symbol ":"
ty <- pIRExpr
pure $ map (,ty) idents
@ -73,31 +72,31 @@ mkPi ascription (param, typ) = Pi param typ ascription
pLAbs :: Parser IRExpr
pLAbs = lexeme $ label "λ-abstraction" $ do
_ <- defChoice $ "λ" :| ["fun"]
_ <- pKeyword "fun" <|> symbol "λ"
params <- pSomeParams
ascription <- optional pAscription
_ <- defChoice $ "=>" :| [""]
_ <- symbol "=>" <|> symbol ""
body <- pIRExpr
pure $ foldr (mkAbs ascription) body params
pPAbs :: Parser IRExpr
pPAbs = lexeme $ label "Π-abstraction" $ do
_ <- defChoice $ "" :| ["forall", ""]
_ <- pKeyword "forall" <|> symbol "" <|> symbol ""
params <- pSomeParams
ascription <- optional pAscription
eat ","
symbol ","
body <- pIRExpr
pure $ foldr (mkPi ascription) body params
pBinding :: Parser (Text, Maybe IRExpr, IRExpr)
pBinding = lexeme $ label "binding" $ do
eat "("
symbol "("
ident <- pIdentifier
params <- pManyParams
ascription <- optional pAscription
eat ":="
symbol ":="
value <- pIRExpr
eat ")"
symbol ")"
pure
( ident
, flip (foldr (mkPi Nothing)) params <$> ascription
@ -106,54 +105,45 @@ pBinding = lexeme $ label "binding" $ do
pLet :: Parser IRExpr
pLet = lexeme $ label "let expression" $ do
eat "let"
pKeyword "let"
bindings <- some pBinding
eat "in"
body <- try pIRExpr
eat "end"
pKeyword "in"
body <- pIRExpr
pKeyword "end"
pure $ foldr letTuple body bindings
where
letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr
letTuple (name, ascription, value) = Let name ascription value
pArrow :: Parser IRExpr
pArrow = lexeme $ label "->" $ do
a <- pAppTerm
_ <- defChoice $ "->" :| [""]
Pi "" a Nothing <$> pIRExpr
pApp :: Parser IRExpr
pApp = lexeme $ foldl1 App <$> some pTerm
pStar :: Parser IRExpr
pStar = lexeme $ Star <$ eat "*"
pSquare :: Parser IRExpr
pSquare = lexeme $ Level 0 <$ defChoice ("" :| ["[]"])
pStar = lexeme $ Star <$ symbol "*"
subscriptDigit :: Parser Integer
subscriptDigit =
choice
[ chunk "" >> pure 0
, chunk "" >> pure 1
, chunk "" >> pure 2
, chunk "" >> pure 3
, chunk "" >> pure 4
, chunk "" >> pure 5
, chunk "" >> pure 6
, chunk "" >> pure 7
, chunk "" >> pure 8
, chunk "" >> pure 9
[ symbol "" >> pure 0
, symbol "" >> pure 1
, symbol "" >> pure 2
, symbol "" >> pure 3
, symbol "" >> pure 4
, symbol "" >> pure 5
, symbol "" >> pure 6
, symbol "" >> pure 7
, symbol "" >> pure 8
, symbol "" >> pure 9
]
subscriptInt :: Parser Integer
subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit
pNumSort :: Parser IRExpr
pNumSort = lexeme $ pSquare >> Level <$> (L.decimal <|> subscriptInt)
pSquare :: Parser IRExpr
pSquare = lexeme $ (symbol "" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt))
pSort :: Parser IRExpr
pSort = lexeme $ pStar <|> try pNumSort <|> pSquare
pSort = lexeme $ pStar <|> pSquare
pTerm :: Parser IRExpr
pTerm = lexeme $ label "term" $ choice [pSort, pVar, between (char '(') (char ')') pIRExpr]
@ -162,45 +152,49 @@ pAppTerm :: Parser IRExpr
pAppTerm = lexeme $ choice [pLAbs, pPAbs, pLet, pApp]
pIRExpr :: Parser IRExpr
pIRExpr = lexeme $ try pArrow <|> pAppTerm
pIRExpr = lexeme $ do
e <- pAppTerm
option e $ do
_ <- symbol "->" <|> symbol ""
Pi "" e Nothing <$> pIRExpr
pAscription :: Parser IRExpr
pAscription = lexeme $ try $ eat ":" >> label "type" pIRExpr
pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr
pAxiom :: Parser IRDef
pAxiom = lexeme $ label "axiom" $ do
eat "axiom"
pKeyword "axiom"
ident <- pIdentifier
params <- pManyParams
ascription <- fmap (flip (foldr (mkPi Nothing)) params) pAscription
eat ";"
symbol ";"
pure $ Axiom ident ascription
pVariable :: Parser [IRSectionDef]
pVariable = lexeme $ label "variable declaration" $ do
eat "variable" <|> eat "hypothesis"
pKeyword "variable" <|> pKeyword "hypothesis"
vars <- pManyParams
eat ";"
symbol ";"
pure $ uncurry Variable <$> vars
pDef :: Parser IRDef
pDef = lexeme $ label "definition" $ do
eat "def"
pKeyword "def"
ident <- pIdentifier
params <- pManyParams
ascription <- fmap (flip (foldr (mkPi Nothing)) params) <$> optional pAscription
eat ":="
symbol ":="
body <- pIRExpr
eat ";"
symbol ";"
pure $ Def ident ascription $ foldr (mkAbs Nothing) body params
pSection :: Parser IRSectionDef
pSection = lexeme $ label "section" $ do
eat "section"
pKeyword "section"
secLabel <- pIdentifier
body <- concat <$> many pIRDef
eat "end"
void $ lexeme $ chunk secLabel
pKeyword "end"
void $ lexeme $ string secLabel
pure $ Section secLabel body
pIRDef :: Parser [IRSectionDef]