2024-11-30 20:34:09 -08:00
|
|
|
{-# LANGUAGE TupleSections #-}
|
2024-11-17 01:57:53 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
module Parser where
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-12-08 21:42:13 -08:00
|
|
|
import Control.Monad.Combinators (option)
|
2024-11-22 10:36:51 -08:00
|
|
|
import Control.Monad.Except
|
2024-12-02 20:39:56 -08:00
|
|
|
import Data.List (foldl, foldl1)
|
2024-12-10 20:31:53 -08:00
|
|
|
import qualified Data.Map.Strict as M
|
2024-11-14 22:02:04 -08:00
|
|
|
import qualified Data.Text as T
|
2024-11-17 18:33:14 -08:00
|
|
|
import Errors (Error (..))
|
2024-11-30 20:34:09 -08:00
|
|
|
import IR
|
2024-11-22 10:36:51 -08:00
|
|
|
import Preprocessor
|
2024-12-11 14:12:56 -08:00
|
|
|
import Text.Megaparsec (MonadParsec (..), ParsecT, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParserT, try)
|
2024-10-05 13:31:09 -07:00
|
|
|
import Text.Megaparsec.Char
|
|
|
|
|
import qualified Text.Megaparsec.Char.Lexer as L
|
2024-10-05 16:04:13 -07:00
|
|
|
|
2024-11-17 18:33:14 -08:00
|
|
|
newtype TypeError = TE Error
|
2024-11-22 19:44:31 -08:00
|
|
|
deriving (Eq, Ord)
|
2024-11-17 01:57:53 -08:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
data Fixity
|
|
|
|
|
= InfixL Int
|
|
|
|
|
| InfixR Int
|
|
|
|
|
deriving (Eq, Show)
|
|
|
|
|
|
|
|
|
|
type Operators = Map Text Fixity
|
|
|
|
|
|
|
|
|
|
type Parser = ParsecT TypeError Text (State Operators)
|
2024-10-05 16:04:13 -07:00
|
|
|
|
2024-11-17 01:57:53 -08:00
|
|
|
instance ShowErrorComponent TypeError where
|
2024-11-22 19:44:31 -08:00
|
|
|
showErrorComponent (TE e) = toString e
|
2024-10-05 13:31:09 -07:00
|
|
|
|
|
|
|
|
skipSpace :: Parser ()
|
|
|
|
|
skipSpace =
|
2024-11-11 16:38:46 -08:00
|
|
|
L.space
|
|
|
|
|
space1
|
|
|
|
|
(L.skipLineComment "--")
|
2024-11-20 12:23:41 -08:00
|
|
|
(L.skipBlockCommentNested "[*" "*]")
|
2024-10-05 13:31:09 -07:00
|
|
|
|
|
|
|
|
lexeme :: Parser a -> Parser a
|
|
|
|
|
lexeme = L.lexeme skipSpace
|
|
|
|
|
|
2024-12-10 23:36:34 -08:00
|
|
|
parens :: Parser a -> Parser a
|
|
|
|
|
parens = between (char '(') (char ')')
|
|
|
|
|
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol :: Text -> Parser ()
|
|
|
|
|
symbol = void . L.symbol skipSpace
|
|
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
symbols :: String
|
|
|
|
|
symbols = "!@#$%^&*-+=<>,./?[]{}\\|`~'\"∧∨⊙×≅"
|
|
|
|
|
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword :: Text -> Parser ()
|
|
|
|
|
pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar)
|
2024-11-22 19:44:31 -08:00
|
|
|
|
2024-11-23 09:16:32 -08:00
|
|
|
keywords :: [Text]
|
2024-12-05 20:11:38 -08:00
|
|
|
keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"]
|
2024-11-23 09:16:32 -08:00
|
|
|
|
2024-11-14 22:02:04 -08:00
|
|
|
pIdentifier :: Parser Text
|
2024-11-23 09:16:32 -08:00
|
|
|
pIdentifier = try $ label "identifier" $ lexeme $ do
|
2024-12-08 21:57:17 -08:00
|
|
|
ident <- T.pack <$> ((:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_'))
|
|
|
|
|
guard $ ident `notElem` keywords
|
2024-11-23 09:16:32 -08:00
|
|
|
pure ident
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
pSymbol :: Parser Text
|
|
|
|
|
pSymbol = lexeme $ takeWhile1P (Just "symbol") (`elem` symbols)
|
|
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pVar :: Parser IRExpr
|
|
|
|
|
pVar = label "variable" $ lexeme $ Var <$> pIdentifier
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
pParamGroup :: Parser Text -> Parser [Param]
|
2024-12-10 23:36:34 -08:00
|
|
|
pParamGroup ident = lexeme $ label "parameter group" $ parens $ do
|
2024-12-10 20:31:53 -08:00
|
|
|
idents <- some ident
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ":"
|
2024-11-30 20:34:09 -08:00
|
|
|
ty <- pIRExpr
|
|
|
|
|
pure $ map (,ty) idents
|
2024-11-11 16:38:46 -08:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
pSomeParams :: Parser Text -> Parser [Param]
|
|
|
|
|
pSomeParams ident = lexeme $ concat <$> some (pParamGroup ident)
|
2024-11-17 18:33:14 -08:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
pManyParams :: Parser Text -> Parser [Param]
|
|
|
|
|
pManyParams ident = lexeme $ concat <$> many (pParamGroup ident)
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-12-11 14:12:56 -08:00
|
|
|
mkAbs :: (Text, IRExpr) -> IRExpr -> IRExpr
|
|
|
|
|
mkAbs (param, typ) = Abs param typ
|
2024-12-01 21:43:15 -08:00
|
|
|
|
2024-12-11 14:12:56 -08:00
|
|
|
mkPi :: (Text, IRExpr) -> IRExpr -> IRExpr
|
|
|
|
|
mkPi (param, typ) = Pi param typ
|
2024-12-01 21:43:15 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pLAbs :: Parser IRExpr
|
|
|
|
|
pLAbs = lexeme $ label "λ-abstraction" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
_ <- pKeyword "fun" <|> symbol "λ"
|
2024-12-10 20:31:53 -08:00
|
|
|
params <- pSomeParams pIdentifier
|
2024-12-08 21:42:13 -08:00
|
|
|
_ <- symbol "=>" <|> symbol "⇒"
|
2024-11-30 20:34:09 -08:00
|
|
|
body <- pIRExpr
|
2024-12-11 14:12:56 -08:00
|
|
|
pure $ foldr mkAbs body params
|
|
|
|
|
|
|
|
|
|
pALAbs :: Parser IRExpr
|
|
|
|
|
pALAbs = lexeme $ label "λ-abstraction" $ do
|
|
|
|
|
_ <- symbol "["
|
|
|
|
|
args <- some pIdentifier
|
|
|
|
|
_ <- symbol ":"
|
|
|
|
|
typ <- pIRExpr
|
|
|
|
|
_ <- symbol "]"
|
|
|
|
|
body <- pIRExpr
|
|
|
|
|
pure $ foldr (mkAbs . (,typ)) body args
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pPAbs :: Parser IRExpr
|
|
|
|
|
pPAbs = lexeme $ label "Π-abstraction" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
_ <- pKeyword "forall" <|> symbol "∏" <|> symbol "∀"
|
2024-12-10 20:31:53 -08:00
|
|
|
params <- pSomeParams pIdentifier
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ","
|
2024-11-30 20:34:09 -08:00
|
|
|
body <- pIRExpr
|
2024-12-11 14:12:56 -08:00
|
|
|
pure $ foldr mkPi body params
|
2024-11-11 16:38:46 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pBinding :: Parser (Text, Maybe IRExpr, IRExpr)
|
2024-11-23 09:16:32 -08:00
|
|
|
pBinding = lexeme $ label "binding" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol "("
|
2024-11-23 09:16:32 -08:00
|
|
|
ident <- pIdentifier
|
2024-12-10 20:31:53 -08:00
|
|
|
params <- pManyParams pIdentifier
|
2024-11-30 21:05:07 -08:00
|
|
|
ascription <- optional pAscription
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ":="
|
2024-11-30 20:34:09 -08:00
|
|
|
value <- pIRExpr
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ")"
|
2024-11-30 20:34:09 -08:00
|
|
|
pure
|
|
|
|
|
( ident
|
2024-12-11 14:12:56 -08:00
|
|
|
, flip (foldr mkPi) params <$> ascription
|
|
|
|
|
, foldr mkAbs value params
|
2024-11-30 20:34:09 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
pLet :: Parser IRExpr
|
|
|
|
|
pLet = lexeme $ label "let expression" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "let"
|
2024-11-23 09:16:32 -08:00
|
|
|
bindings <- some pBinding
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "in"
|
|
|
|
|
body <- pIRExpr
|
|
|
|
|
pKeyword "end"
|
2024-11-30 20:34:09 -08:00
|
|
|
pure $ foldr letTuple body bindings
|
|
|
|
|
where
|
|
|
|
|
letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr
|
|
|
|
|
letTuple (name, ascription, value) = Let name ascription value
|
2024-11-23 09:16:32 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pApp :: Parser IRExpr
|
2024-11-17 18:33:14 -08:00
|
|
|
pApp = lexeme $ foldl1 App <$> some pTerm
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pStar :: Parser IRExpr
|
2024-12-10 20:31:53 -08:00
|
|
|
pStar = lexeme $ Star <$ symbol "★"
|
2024-12-02 20:39:56 -08:00
|
|
|
|
|
|
|
|
subscriptDigit :: Parser Integer
|
|
|
|
|
subscriptDigit =
|
|
|
|
|
choice
|
2024-12-08 21:42:13 -08:00
|
|
|
[ symbol "₀" >> pure 0
|
|
|
|
|
, symbol "₁" >> pure 1
|
|
|
|
|
, symbol "₂" >> pure 2
|
|
|
|
|
, symbol "₃" >> pure 3
|
|
|
|
|
, symbol "₄" >> pure 4
|
|
|
|
|
, symbol "₅" >> pure 5
|
|
|
|
|
, symbol "₆" >> pure 6
|
|
|
|
|
, symbol "₇" >> pure 7
|
|
|
|
|
, symbol "₈" >> pure 8
|
|
|
|
|
, symbol "₉" >> pure 9
|
2024-12-02 20:39:56 -08:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
subscriptInt :: Parser Integer
|
|
|
|
|
subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-12-08 21:42:13 -08:00
|
|
|
pSquare :: Parser IRExpr
|
|
|
|
|
pSquare = lexeme $ (symbol "□" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt))
|
2024-11-28 13:39:23 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pSort :: Parser IRExpr
|
2024-12-08 21:42:13 -08:00
|
|
|
pSort = lexeme $ pStar <|> pSquare
|
2024-11-17 18:33:14 -08:00
|
|
|
|
2024-12-10 23:36:34 -08:00
|
|
|
pOpSection :: Parser IRExpr
|
|
|
|
|
pOpSection = lexeme $ parens $ Var <$> pSymbol
|
|
|
|
|
|
2024-12-04 17:46:50 -08:00
|
|
|
pTerm :: Parser IRExpr
|
2024-12-10 23:36:34 -08:00
|
|
|
pTerm = lexeme $ label "term" $ choice [pSort, pVar, try pOpSection, parens pIRExpr]
|
2024-12-04 17:46:50 -08:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
pInfix :: Parser IRExpr
|
|
|
|
|
pInfix = parseWithPrec 0
|
|
|
|
|
where
|
|
|
|
|
parseWithPrec :: Int -> Parser IRExpr
|
|
|
|
|
parseWithPrec prec = pApp >>= continue prec
|
|
|
|
|
|
|
|
|
|
continue :: Int -> IRExpr -> Parser IRExpr
|
|
|
|
|
continue prec lhs = option lhs $ do
|
|
|
|
|
op <- lookAhead pSymbol
|
|
|
|
|
operators <- get
|
|
|
|
|
case M.lookup op operators of
|
|
|
|
|
Just fixity -> do
|
|
|
|
|
let (opPrec, nextPrec) = case fixity of
|
|
|
|
|
InfixL p -> (p, p)
|
|
|
|
|
InfixR p -> (p, p + 1)
|
|
|
|
|
if opPrec <= prec
|
|
|
|
|
then pure lhs
|
|
|
|
|
else do
|
|
|
|
|
_ <- pSymbol
|
|
|
|
|
rhs <- parseWithPrec nextPrec
|
|
|
|
|
continue prec (App (App (Var op) lhs) rhs)
|
|
|
|
|
Nothing -> fail $ "unknown operator '" ++ toString op ++ "'"
|
|
|
|
|
|
2024-12-04 17:46:50 -08:00
|
|
|
pAppTerm :: Parser IRExpr
|
2024-12-11 14:12:56 -08:00
|
|
|
pAppTerm = lexeme $ choice [pLAbs, pALAbs, pPAbs, pLet, pInfix]
|
2024-12-04 17:46:50 -08:00
|
|
|
|
|
|
|
|
pIRExpr :: Parser IRExpr
|
2024-12-08 21:42:13 -08:00
|
|
|
pIRExpr = lexeme $ do
|
|
|
|
|
e <- pAppTerm
|
2024-12-11 14:12:56 -08:00
|
|
|
option e $ (symbol "->" <|> symbol "→") >> Pi "" e <$> pIRExpr
|
2024-12-04 17:46:50 -08:00
|
|
|
|
|
|
|
|
pAscription :: Parser IRExpr
|
2024-12-08 21:42:13 -08:00
|
|
|
pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr
|
2024-12-04 17:46:50 -08:00
|
|
|
|
2024-11-30 21:05:07 -08:00
|
|
|
pAxiom :: Parser IRDef
|
|
|
|
|
pAxiom = lexeme $ label "axiom" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "axiom"
|
2024-11-30 21:05:07 -08:00
|
|
|
ident <- pIdentifier
|
2024-12-10 20:31:53 -08:00
|
|
|
params <- pManyParams (pIdentifier <|> pSymbol)
|
2024-12-11 14:12:56 -08:00
|
|
|
ascription <- fmap (flip (foldr mkPi) params) pAscription
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ";"
|
2024-11-30 23:43:17 -08:00
|
|
|
pure $ Axiom ident ascription
|
2024-11-22 19:44:31 -08:00
|
|
|
|
2024-12-06 13:36:14 -08:00
|
|
|
pVariable :: Parser [IRSectionDef]
|
2024-12-04 17:46:50 -08:00
|
|
|
pVariable = lexeme $ label "variable declaration" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "variable" <|> pKeyword "hypothesis"
|
2024-12-10 20:31:53 -08:00
|
|
|
vars <- pManyParams (pIdentifier <|> pSymbol)
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ";"
|
2024-12-06 13:36:14 -08:00
|
|
|
pure $ uncurry Variable <$> vars
|
2024-12-04 17:46:50 -08:00
|
|
|
|
2024-11-30 21:05:07 -08:00
|
|
|
pDef :: Parser IRDef
|
|
|
|
|
pDef = lexeme $ label "definition" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "def"
|
2024-12-10 20:31:53 -08:00
|
|
|
ident <- pIdentifier <|> pSymbol
|
|
|
|
|
params <- pManyParams pIdentifier
|
2024-12-11 14:12:56 -08:00
|
|
|
ascription <- fmap (flip (foldr mkPi) params) <$> optional pAscription
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ":="
|
2024-11-30 20:34:09 -08:00
|
|
|
body <- pIRExpr
|
2024-12-08 21:42:13 -08:00
|
|
|
symbol ";"
|
2024-12-11 14:12:56 -08:00
|
|
|
pure $ Def ident ascription $ foldr mkAbs body params
|
2024-10-05 13:31:09 -07:00
|
|
|
|
2024-12-10 20:31:53 -08:00
|
|
|
pFixityDec :: Parser ()
|
|
|
|
|
pFixityDec = do
|
|
|
|
|
_ <- string "infix"
|
|
|
|
|
fixity <-
|
|
|
|
|
choice
|
|
|
|
|
[ InfixL <$> (lexeme (char 'l') >> lexeme L.decimal)
|
|
|
|
|
, InfixR <$> (lexeme (char 'r') >> lexeme L.decimal)
|
|
|
|
|
]
|
|
|
|
|
ident <- pSymbol
|
|
|
|
|
modify (M.insert ident fixity)
|
|
|
|
|
symbol ";"
|
|
|
|
|
|
2024-12-06 13:36:14 -08:00
|
|
|
pSection :: Parser IRSectionDef
|
2024-12-04 17:46:50 -08:00
|
|
|
pSection = lexeme $ label "section" $ do
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "section"
|
2024-12-04 17:46:50 -08:00
|
|
|
secLabel <- pIdentifier
|
2024-12-06 13:36:14 -08:00
|
|
|
body <- concat <$> many pIRDef
|
2024-12-08 21:42:13 -08:00
|
|
|
pKeyword "end"
|
|
|
|
|
void $ lexeme $ string secLabel
|
2024-12-04 17:46:50 -08:00
|
|
|
pure $ Section secLabel body
|
2024-11-17 18:33:14 -08:00
|
|
|
|
2024-12-06 13:36:14 -08:00
|
|
|
pIRDef :: Parser [IRSectionDef]
|
2024-12-10 20:31:53 -08:00
|
|
|
pIRDef = (pure . IRDef <$> pAxiom) <|> (pure . IRDef <$> pDef) <|> pVariable <|> (pure <$> pSection) <|> ([] <$ pFixityDec)
|
2024-11-22 19:44:31 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
pIRProgram :: Parser IRProgram
|
2024-12-06 13:36:14 -08:00
|
|
|
pIRProgram = skipSpace >> concat <$> some pIRDef
|
2024-11-17 18:33:14 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
parserWrapper :: Parser a -> String -> Text -> Either String a
|
2024-12-10 20:31:53 -08:00
|
|
|
parserWrapper p filename input = first errorBundlePretty $ evalState (runParserT p filename input) M.empty
|
2024-11-17 18:33:14 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
parseProgram :: String -> Text -> Either String IRProgram
|
|
|
|
|
parseProgram = parserWrapper pIRProgram
|
2024-11-18 14:33:21 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
parseDef :: String -> Text -> Either String IRDef
|
2024-12-06 13:36:14 -08:00
|
|
|
parseDef = parserWrapper (pAxiom <|> pDef)
|
2024-11-17 18:33:14 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
parseExpr :: String -> Text -> Either String IRExpr
|
|
|
|
|
parseExpr = parserWrapper pIRExpr
|
2024-11-22 10:36:51 -08:00
|
|
|
|
2024-11-30 20:34:09 -08:00
|
|
|
handleFile :: String -> ExceptT String IO IRProgram
|
2024-11-30 23:43:17 -08:00
|
|
|
handleFile filename = toString `withExceptT` runPreprocessor filename >>= hoistEither . parseProgram filename
|