perga/lib/Parser.hs

282 lines
8.1 KiB
Haskell
Raw Normal View History

2024-11-30 20:34:09 -08:00
{-# LANGUAGE TupleSections #-}
2024-11-17 01:57:53 -08:00
2024-11-30 20:34:09 -08:00
module Parser where
2024-10-05 13:31:09 -07:00
2024-12-08 21:42:13 -08:00
import Control.Monad.Combinators (option)
import Control.Monad.Except
import Data.List (foldl, foldl1)
2024-12-10 20:31:53 -08:00
import qualified Data.Map.Strict as M
2024-11-14 22:02:04 -08:00
import qualified Data.Text as T
2024-11-17 18:33:14 -08:00
import Errors (Error (..))
2024-11-30 20:34:09 -08:00
import IR
import Preprocessor
import Text.Megaparsec (MonadParsec (..), ParsecT, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParserT, try)
2024-10-05 13:31:09 -07:00
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
2024-10-05 16:04:13 -07:00
2024-11-17 18:33:14 -08:00
newtype TypeError = TE Error
2024-11-22 19:44:31 -08:00
deriving (Eq, Ord)
2024-11-17 01:57:53 -08:00
2024-12-10 20:31:53 -08:00
data Fixity
= InfixL Int
| InfixR Int
deriving (Eq, Show)
type Operators = Map Text Fixity
type Parser = ParsecT TypeError Text (State Operators)
2024-10-05 16:04:13 -07:00
2024-11-17 01:57:53 -08:00
instance ShowErrorComponent TypeError where
2024-11-22 19:44:31 -08:00
showErrorComponent (TE e) = toString e
2024-10-05 13:31:09 -07:00
skipSpace :: Parser ()
skipSpace =
2024-11-11 16:38:46 -08:00
L.space
space1
(L.skipLineComment "--")
(L.skipBlockCommentNested "[*" "*]")
2024-10-05 13:31:09 -07:00
lexeme :: Parser a -> Parser a
lexeme = L.lexeme skipSpace
2024-12-10 23:36:34 -08:00
parens :: Parser a -> Parser a
parens = between (char '(') (char ')')
2024-12-08 21:42:13 -08:00
symbol :: Text -> Parser ()
symbol = void . L.symbol skipSpace
2024-12-10 20:31:53 -08:00
symbols :: String
symbols = "!@#$%^&*-+=<>,./?[]{}\\|`~'\"∧∨⊙×≅"
2024-12-08 21:42:13 -08:00
pKeyword :: Text -> Parser ()
pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar)
2024-11-22 19:44:31 -08:00
2024-11-23 09:16:32 -08:00
keywords :: [Text]
2024-12-05 20:11:38 -08:00
keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"]
2024-11-23 09:16:32 -08:00
2024-11-14 22:02:04 -08:00
pIdentifier :: Parser Text
2024-11-23 09:16:32 -08:00
pIdentifier = try $ label "identifier" $ lexeme $ do
2024-12-08 21:57:17 -08:00
ident <- T.pack <$> ((:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_'))
guard $ ident `notElem` keywords
2024-11-23 09:16:32 -08:00
pure ident
2024-10-05 13:31:09 -07:00
2024-12-10 20:31:53 -08:00
pSymbol :: Parser Text
pSymbol = lexeme $ takeWhile1P (Just "symbol") (`elem` symbols)
2024-11-30 20:34:09 -08:00
pVar :: Parser IRExpr
pVar = label "variable" $ lexeme $ Var <$> pIdentifier
2024-10-05 13:31:09 -07:00
pPureVar :: Parser IRExpr
pPureVar = label "variable" $ lexeme $ symbol "#" >> PureVar <$> pIdentifier
2024-12-13 22:45:37 -08:00
pParamGroup :: Parser [Param]
pParamGroup = lexeme $ label "parameter group" $ parens $ do
idents <- some $ pIdentifier <|> pSymbol
2024-12-08 21:42:13 -08:00
symbol ":"
2024-11-30 20:34:09 -08:00
ty <- pIRExpr
pure $ map (,ty) idents
2024-11-11 16:38:46 -08:00
2024-12-13 22:45:37 -08:00
pSomeParams :: Parser [Param]
pSomeParams = lexeme $ concat <$> some pParamGroup
2024-11-17 18:33:14 -08:00
2024-12-13 22:45:37 -08:00
pManyParams :: Parser [Param]
pManyParams = lexeme $ concat <$> many pParamGroup
2024-10-05 13:31:09 -07:00
mkAbs :: (Text, IRExpr) -> IRExpr -> IRExpr
mkAbs (param, typ) = Abs param typ
mkPi :: (Text, IRExpr) -> IRExpr -> IRExpr
mkPi (param, typ) = Pi param typ
2024-11-30 20:34:09 -08:00
pLAbs :: Parser IRExpr
pLAbs = lexeme $ label "λ-abstraction" $ do
2024-12-08 21:42:13 -08:00
_ <- pKeyword "fun" <|> symbol "λ"
2024-12-13 22:45:37 -08:00
params <- pSomeParams
2024-12-08 21:42:13 -08:00
_ <- symbol "=>" <|> symbol ""
2024-11-30 20:34:09 -08:00
body <- pIRExpr
pure $ foldr mkAbs body params
pALAbs :: Parser IRExpr
pALAbs = lexeme $ label "λ-abstraction" $ do
_ <- symbol "["
args <- some pIdentifier
_ <- symbol ":"
typ <- pIRExpr
_ <- symbol "]"
body <- pIRExpr
pure $ foldr (mkAbs . (,typ)) body args
2024-10-05 13:31:09 -07:00
2024-11-30 20:34:09 -08:00
pPAbs :: Parser IRExpr
pPAbs = lexeme $ label "Π-abstraction" $ do
2024-12-08 21:42:13 -08:00
_ <- pKeyword "forall" <|> symbol "" <|> symbol ""
2024-12-13 22:45:37 -08:00
params <- pSomeParams
2024-12-08 21:42:13 -08:00
symbol ","
2024-11-30 20:34:09 -08:00
body <- pIRExpr
pure $ foldr mkPi body params
2024-11-11 16:38:46 -08:00
2024-11-30 20:34:09 -08:00
pBinding :: Parser (Text, Maybe IRExpr, IRExpr)
2024-11-23 09:16:32 -08:00
pBinding = lexeme $ label "binding" $ do
2024-12-08 21:42:13 -08:00
symbol "("
2024-11-23 09:16:32 -08:00
ident <- pIdentifier
2024-12-13 22:45:37 -08:00
params <- pManyParams
2024-11-30 21:05:07 -08:00
ascription <- optional pAscription
2024-12-08 21:42:13 -08:00
symbol ":="
2024-11-30 20:34:09 -08:00
value <- pIRExpr
2024-12-08 21:42:13 -08:00
symbol ")"
2024-11-30 20:34:09 -08:00
pure
( ident
, flip (foldr mkPi) params <$> ascription
, foldr mkAbs value params
2024-11-30 20:34:09 -08:00
)
pLet :: Parser IRExpr
pLet = lexeme $ label "let expression" $ do
2024-12-08 21:42:13 -08:00
pKeyword "let"
2024-11-23 09:16:32 -08:00
bindings <- some pBinding
2024-12-08 21:42:13 -08:00
pKeyword "in"
body <- pIRExpr
pKeyword "end"
2024-11-30 20:34:09 -08:00
pure $ foldr letTuple body bindings
where
letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr
letTuple (name, ascription, value) = Let name ascription value
2024-11-23 09:16:32 -08:00
2024-11-30 20:34:09 -08:00
pApp :: Parser IRExpr
2024-11-17 18:33:14 -08:00
pApp = lexeme $ foldl1 App <$> some pTerm
2024-10-05 13:31:09 -07:00
2024-11-30 20:34:09 -08:00
pStar :: Parser IRExpr
2024-12-10 20:31:53 -08:00
pStar = lexeme $ Star <$ symbol ""
subscriptDigit :: Parser Integer
subscriptDigit =
choice
2024-12-08 21:42:13 -08:00
[ symbol "" >> pure 0
, symbol "" >> pure 1
, symbol "" >> pure 2
, symbol "" >> pure 3
, symbol "" >> pure 4
, symbol "" >> pure 5
, symbol "" >> pure 6
, symbol "" >> pure 7
, symbol "" >> pure 8
, symbol "" >> pure 9
]
subscriptInt :: Parser Integer
subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit
2024-10-05 13:31:09 -07:00
2024-12-08 21:42:13 -08:00
pSquare :: Parser IRExpr
pSquare = lexeme $ (symbol "" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt))
2024-11-30 20:34:09 -08:00
pSort :: Parser IRExpr
2024-12-08 21:42:13 -08:00
pSort = lexeme $ pStar <|> pSquare
2024-11-17 18:33:14 -08:00
2024-12-10 23:36:34 -08:00
pOpSection :: Parser IRExpr
pOpSection = lexeme $ parens $ Var <$> pSymbol
2024-12-04 17:46:50 -08:00
pTerm :: Parser IRExpr
pTerm = lexeme $ label "term" $ choice [pSort, pPureVar, pVar, try pOpSection, parens pIRExpr]
2024-12-04 17:46:50 -08:00
2024-12-10 20:31:53 -08:00
pInfix :: Parser IRExpr
pInfix = parseWithPrec 0
where
parseWithPrec :: Int -> Parser IRExpr
parseWithPrec prec = pApp >>= continue prec
continue :: Int -> IRExpr -> Parser IRExpr
continue prec lhs = option lhs $ do
op <- lookAhead pSymbol
operators <- get
case M.lookup op operators of
Just fixity -> do
let (opPrec, nextPrec) = case fixity of
InfixL p -> (p, p)
InfixR p -> (p, p + 1)
if opPrec <= prec
then pure lhs
else do
_ <- pSymbol
rhs <- parseWithPrec nextPrec
continue prec (App (App (Var op) lhs) rhs)
Nothing -> fail $ "unknown operator '" ++ toString op ++ "'"
2024-12-04 17:46:50 -08:00
pAppTerm :: Parser IRExpr
pAppTerm = lexeme $ choice [pLAbs, pALAbs, pPAbs, pLet, pInfix]
2024-12-04 17:46:50 -08:00
pIRExpr :: Parser IRExpr
2024-12-08 21:42:13 -08:00
pIRExpr = lexeme $ do
e <- pAppTerm
option e $ (symbol "->" <|> symbol "") >> Pi "" e <$> pIRExpr
2024-12-04 17:46:50 -08:00
pAscription :: Parser IRExpr
2024-12-08 21:42:13 -08:00
pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr
2024-12-04 17:46:50 -08:00
2024-11-30 21:05:07 -08:00
pAxiom :: Parser IRDef
pAxiom = lexeme $ label "axiom" $ do
2024-12-08 21:42:13 -08:00
pKeyword "axiom"
2024-11-30 21:05:07 -08:00
ident <- pIdentifier
2024-12-13 22:45:37 -08:00
params <- pManyParams
ascription <- fmap (flip (foldr mkPi) params) pAscription
2024-12-08 21:42:13 -08:00
symbol ";"
pure $ Axiom ident ascription
2024-11-22 19:44:31 -08:00
2024-12-06 13:36:14 -08:00
pVariable :: Parser [IRSectionDef]
2024-12-04 17:46:50 -08:00
pVariable = lexeme $ label "variable declaration" $ do
2024-12-08 21:42:13 -08:00
pKeyword "variable" <|> pKeyword "hypothesis"
2024-12-13 22:45:37 -08:00
vars <- pManyParams
2024-12-08 21:42:13 -08:00
symbol ";"
2024-12-06 13:36:14 -08:00
pure $ uncurry Variable <$> vars
2024-12-04 17:46:50 -08:00
2024-11-30 21:05:07 -08:00
pDef :: Parser IRDef
pDef = lexeme $ label "definition" $ do
2024-12-08 21:42:13 -08:00
pKeyword "def"
2024-12-10 20:31:53 -08:00
ident <- pIdentifier <|> pSymbol
2024-12-13 22:45:37 -08:00
params <- pManyParams
ascription <- fmap (flip (foldr mkPi) params) <$> optional pAscription
2024-12-08 21:42:13 -08:00
symbol ":="
2024-11-30 20:34:09 -08:00
body <- pIRExpr
2024-12-08 21:42:13 -08:00
symbol ";"
pure $ Def ident ascription $ foldr mkAbs body params
2024-10-05 13:31:09 -07:00
2024-12-10 20:31:53 -08:00
pFixityDec :: Parser ()
pFixityDec = do
_ <- string "infix"
fixity <-
choice
[ InfixL <$> (lexeme (char 'l') >> lexeme L.decimal)
, InfixR <$> (lexeme (char 'r') >> lexeme L.decimal)
]
ident <- pSymbol
modify (M.insert ident fixity)
symbol ";"
2024-12-06 13:36:14 -08:00
pSection :: Parser IRSectionDef
2024-12-04 17:46:50 -08:00
pSection = lexeme $ label "section" $ do
2024-12-08 21:42:13 -08:00
pKeyword "section"
2024-12-04 17:46:50 -08:00
secLabel <- pIdentifier
2024-12-06 13:36:14 -08:00
body <- concat <$> many pIRDef
2024-12-08 21:42:13 -08:00
pKeyword "end"
void $ lexeme $ string secLabel
2024-12-04 17:46:50 -08:00
pure $ Section secLabel body
2024-11-17 18:33:14 -08:00
2024-12-06 13:36:14 -08:00
pIRDef :: Parser [IRSectionDef]
2024-12-10 20:31:53 -08:00
pIRDef = (pure . IRDef <$> pAxiom) <|> (pure . IRDef <$> pDef) <|> pVariable <|> (pure <$> pSection) <|> ([] <$ pFixityDec)
2024-11-22 19:44:31 -08:00
2024-11-30 20:34:09 -08:00
pIRProgram :: Parser IRProgram
2024-12-06 13:36:14 -08:00
pIRProgram = skipSpace >> concat <$> some pIRDef
2024-11-17 18:33:14 -08:00
2024-11-30 20:34:09 -08:00
parserWrapper :: Parser a -> String -> Text -> Either String a
2024-12-10 20:31:53 -08:00
parserWrapper p filename input = first errorBundlePretty $ evalState (runParserT p filename input) M.empty
2024-11-17 18:33:14 -08:00
2024-11-30 20:34:09 -08:00
parseProgram :: String -> Text -> Either String IRProgram
parseProgram = parserWrapper pIRProgram
2024-11-18 14:33:21 -08:00
2024-11-30 20:34:09 -08:00
parseDef :: String -> Text -> Either String IRDef
2024-12-06 13:36:14 -08:00
parseDef = parserWrapper (pAxiom <|> pDef)
2024-11-17 18:33:14 -08:00
2024-11-30 20:34:09 -08:00
parseExpr :: String -> Text -> Either String IRExpr
parseExpr = parserWrapper pIRExpr
2024-11-30 20:34:09 -08:00
handleFile :: String -> ExceptT String IO IRProgram
handleFile filename = toString `withExceptT` runPreprocessor filename >>= hoistEither . parseProgram filename