perga/lib/Parser.hs
2024-12-08 21:57:17 -08:00

215 lines
6.3 KiB
Haskell

{-# LANGUAGE TupleSections #-}
module Parser where
import Control.Monad.Combinators (option)
import Control.Monad.Except
import Data.List (foldl, foldl1)
import qualified Data.Text as T
import Errors (Error (..))
import IR
import Preprocessor
import Text.Megaparsec (MonadParsec (..), Parsec, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParser, try)
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
newtype TypeError = TE Error
deriving (Eq, Ord)
type Parser = Parsec TypeError Text
instance ShowErrorComponent TypeError where
showErrorComponent (TE e) = toString e
skipSpace :: Parser ()
skipSpace =
L.space
space1
(L.skipLineComment "--")
(L.skipBlockCommentNested "[*" "*]")
lexeme :: Parser a -> Parser a
lexeme = L.lexeme skipSpace
symbol :: Text -> Parser ()
symbol = void . L.symbol skipSpace
pKeyword :: Text -> Parser ()
pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar)
keywords :: [Text]
keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"]
pIdentifier :: Parser Text
pIdentifier = try $ label "identifier" $ lexeme $ do
ident <- T.pack <$> ((:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_'))
guard $ ident `notElem` keywords
pure ident
pVar :: Parser IRExpr
pVar = label "variable" $ lexeme $ Var <$> pIdentifier
pParamGroup :: Parser [Param]
pParamGroup = lexeme $ label "parameter group" $ between (char '(') (char ')') $ do
idents <- some pIdentifier
symbol ":"
ty <- pIRExpr
pure $ map (,ty) idents
pSomeParams :: Parser [Param]
pSomeParams = lexeme $ concat <$> some pParamGroup
pManyParams :: Parser [Param]
pManyParams = lexeme $ concat <$> many pParamGroup
mkAbs :: Maybe IRExpr -> (Text, IRExpr) -> IRExpr -> IRExpr
mkAbs ascription (param, typ) = Abs param typ ascription
mkPi :: Maybe IRExpr -> (Text, IRExpr) -> IRExpr -> IRExpr
mkPi ascription (param, typ) = Pi param typ ascription
pLAbs :: Parser IRExpr
pLAbs = lexeme $ label "λ-abstraction" $ do
_ <- pKeyword "fun" <|> symbol "λ"
params <- pSomeParams
ascription <- optional pAscription
_ <- symbol "=>" <|> symbol ""
body <- pIRExpr
pure $ foldr (mkAbs ascription) body params
pPAbs :: Parser IRExpr
pPAbs = lexeme $ label "Π-abstraction" $ do
_ <- pKeyword "forall" <|> symbol "" <|> symbol ""
params <- pSomeParams
ascription <- optional pAscription
symbol ","
body <- pIRExpr
pure $ foldr (mkPi ascription) body params
pBinding :: Parser (Text, Maybe IRExpr, IRExpr)
pBinding = lexeme $ label "binding" $ do
symbol "("
ident <- pIdentifier
params <- pManyParams
ascription <- optional pAscription
symbol ":="
value <- pIRExpr
symbol ")"
pure
( ident
, flip (foldr (mkPi Nothing)) params <$> ascription
, foldr (mkAbs Nothing) value params
)
pLet :: Parser IRExpr
pLet = lexeme $ label "let expression" $ do
pKeyword "let"
bindings <- some pBinding
pKeyword "in"
body <- pIRExpr
pKeyword "end"
pure $ foldr letTuple body bindings
where
letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr
letTuple (name, ascription, value) = Let name ascription value
pApp :: Parser IRExpr
pApp = lexeme $ foldl1 App <$> some pTerm
pStar :: Parser IRExpr
pStar = lexeme $ Star <$ symbol "*"
subscriptDigit :: Parser Integer
subscriptDigit =
choice
[ symbol "" >> pure 0
, symbol "" >> pure 1
, symbol "" >> pure 2
, symbol "" >> pure 3
, symbol "" >> pure 4
, symbol "" >> pure 5
, symbol "" >> pure 6
, symbol "" >> pure 7
, symbol "" >> pure 8
, symbol "" >> pure 9
]
subscriptInt :: Parser Integer
subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit
pSquare :: Parser IRExpr
pSquare = lexeme $ (symbol "" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt))
pSort :: Parser IRExpr
pSort = lexeme $ pStar <|> pSquare
pTerm :: Parser IRExpr
pTerm = lexeme $ label "term" $ choice [pSort, pVar, between (char '(') (char ')') pIRExpr]
pAppTerm :: Parser IRExpr
pAppTerm = lexeme $ choice [pLAbs, pPAbs, pLet, pApp]
pIRExpr :: Parser IRExpr
pIRExpr = lexeme $ do
e <- pAppTerm
option e $ (symbol "->" <|> symbol "") >> Pi "" e Nothing <$> pIRExpr
pAscription :: Parser IRExpr
pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr
pAxiom :: Parser IRDef
pAxiom = lexeme $ label "axiom" $ do
pKeyword "axiom"
ident <- pIdentifier
params <- pManyParams
ascription <- fmap (flip (foldr (mkPi Nothing)) params) pAscription
symbol ";"
pure $ Axiom ident ascription
pVariable :: Parser [IRSectionDef]
pVariable = lexeme $ label "variable declaration" $ do
pKeyword "variable" <|> pKeyword "hypothesis"
vars <- pManyParams
symbol ";"
pure $ uncurry Variable <$> vars
pDef :: Parser IRDef
pDef = lexeme $ label "definition" $ do
pKeyword "def"
ident <- pIdentifier
params <- pManyParams
ascription <- fmap (flip (foldr (mkPi Nothing)) params) <$> optional pAscription
symbol ":="
body <- pIRExpr
symbol ";"
pure $ Def ident ascription $ foldr (mkAbs Nothing) body params
pSection :: Parser IRSectionDef
pSection = lexeme $ label "section" $ do
pKeyword "section"
secLabel <- pIdentifier
body <- concat <$> many pIRDef
pKeyword "end"
void $ lexeme $ string secLabel
pure $ Section secLabel body
pIRDef :: Parser [IRSectionDef]
pIRDef = (pure . IRDef <$> pAxiom) <|> (pure . IRDef <$> pDef) <|> pVariable <|> (pure <$> pSection)
pIRProgram :: Parser IRProgram
pIRProgram = skipSpace >> concat <$> some pIRDef
parserWrapper :: Parser a -> String -> Text -> Either String a
parserWrapper p filename input = first errorBundlePretty $ runParser p filename input
parseProgram :: String -> Text -> Either String IRProgram
parseProgram = parserWrapper pIRProgram
parseDef :: String -> Text -> Either String IRDef
parseDef = parserWrapper (pAxiom <|> pDef)
parseExpr :: String -> Text -> Either String IRExpr
parseExpr = parserWrapper pIRExpr
handleFile :: String -> ExceptT String IO IRProgram
handleFile filename = toString `withExceptT` runPreprocessor filename >>= hoistEither . parseProgram filename