perga/lib/Parser.hs

315 lines
9 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE TupleSections #-}
module Parser where
import Control.Monad.Combinators (option)
import Control.Monad.Except
import Data.List (foldl, foldl1)
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import Errors (Error (..))
import IR
import Preprocessor
import Text.Megaparsec (MonadParsec (..), ParsecT, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParserT, try)
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
newtype TypeError = TE Error
deriving (Eq, Ord)
data InfixDef = InfixDef
{ infixFixity :: Fixity
, infixOp :: Text -> IRExpr -> IRExpr -> IRExpr
}
data Fixity
= InfixL Int
| InfixR Int
deriving (Eq, Show)
type Operators = Map Text InfixDef
initialOps :: Operators
initialOps =
M.fromAscList
[ ("", InfixDef (InfixR 2) (const $ Pi ""))
, ("->", InfixDef (InfixR 2) (const $ Pi ""))
, ("×", InfixDef (InfixL 10) (const Prod))
]
type Parser = ParsecT TypeError Text (State Operators)
instance ShowErrorComponent TypeError where
showErrorComponent (TE e) = toString e
skipSpace :: Parser ()
skipSpace =
L.space
space1
(L.skipLineComment "--")
(L.skipBlockCommentNested "[*" "*]")
lexeme :: Parser a -> Parser a
lexeme = L.lexeme skipSpace
parens :: Parser a -> Parser a
parens = between (char '(') (char ')')
symbol :: Text -> Parser ()
symbol = void . L.symbol skipSpace
symbols :: String
symbols = "→!@#$%^&*-+=<>,./?[]{}\\|`~'\"∧∨⊙×≅"
pKeyword :: Text -> Parser ()
pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar)
keywords :: [Text]
keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"]
pIdentifier :: Parser Text
pIdentifier = try $ label "identifier" $ lexeme $ do
ident <- T.pack <$> ((:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_'))
guard $ ident `notElem` keywords
pure ident
pSymbol :: Parser Text
pSymbol = lexeme $ takeWhile1P (Just "symbol") (`elem` symbols)
pVar :: Parser IRExpr
pVar = label "variable" $ lexeme $ Var <$> pIdentifier
pPureVar :: Parser IRExpr
pPureVar = label "variable" $ lexeme $ symbol "#" >> PureVar <$> pIdentifier
pParamGroup :: Parser [Param]
pParamGroup = lexeme $ label "parameter group" $ parens $ do
idents <- some $ pIdentifier <|> pSymbol
symbol ":"
ty <- pIRExpr
pure $ map (,ty) idents
pSomeParams :: Parser [Param]
pSomeParams = lexeme $ concat <$> some pParamGroup
pManyParams :: Parser [Param]
pManyParams = lexeme $ concat <$> many pParamGroup
mkAbs :: (Text, IRExpr) -> IRExpr -> IRExpr
mkAbs (param, typ) = Abs param typ
mkPi :: (Text, IRExpr) -> IRExpr -> IRExpr
mkPi (param, typ) = Pi param typ
pLAbs :: Parser IRExpr
pLAbs = lexeme $ label "λ-abstraction" $ do
_ <- pKeyword "fun" <|> symbol "λ"
params <- pSomeParams
_ <- symbol "=>" <|> symbol ""
body <- pIRExpr
pure $ foldr mkAbs body params
pALAbs :: Parser IRExpr
pALAbs = lexeme $ label "λ-abstraction" $ do
_ <- symbol "["
args <- some pIdentifier
_ <- symbol ":"
typ <- pIRExpr
_ <- symbol "]"
body <- pIRExpr
pure $ foldr (mkAbs . (,typ)) body args
pPAbs :: Parser IRExpr
pPAbs = lexeme $ label "Π-abstraction" $ do
_ <- pKeyword "forall" <|> symbol "" <|> symbol ""
params <- pSomeParams
symbol ","
body <- pIRExpr
pure $ foldr mkPi body params
pBinding :: Parser (Text, Maybe IRExpr, IRExpr)
pBinding = lexeme $ label "binding" $ do
symbol "("
ident <- pIdentifier
params <- pManyParams
ascription <- optional pAscription
symbol ":="
value <- pIRExpr
symbol ")"
pure
( ident
, flip (foldr mkPi) params <$> ascription
, foldr mkAbs value params
)
pLet :: Parser IRExpr
pLet = lexeme $ label "let expression" $ do
pKeyword "let"
bindings <- some pBinding
pKeyword "in"
body <- pIRExpr
pKeyword "end"
pure $ foldr letTuple body bindings
where
letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr
letTuple (name, ascription, value) = Let name ascription value
pApp :: Parser IRExpr
pApp = lexeme $ foldl1 App <$> some pTerm
pStar :: Parser IRExpr
pStar = lexeme $ Star <$ symbol ""
subscriptDigit :: Parser Integer
subscriptDigit =
choice
[ symbol "" >> pure 0
, symbol "" >> pure 1
, symbol "" >> pure 2
, symbol "" >> pure 3
, symbol "" >> pure 4
, symbol "" >> pure 5
, symbol "" >> pure 6
, symbol "" >> pure 7
, symbol "" >> pure 8
, symbol "" >> pure 9
]
subscriptInt :: Parser Integer
subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit
pSquare :: Parser IRExpr
pSquare = lexeme $ (symbol "" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt))
pSort :: Parser IRExpr
pSort = lexeme $ pStar <|> pSquare
pOpSection :: Parser IRExpr
pOpSection = lexeme $ parens $ Var <$> pSymbol
pProd :: Parser IRExpr
pProd = lexeme $ between (char '{') (char '}') $ do
left <- pIRExpr
_ <- symbol "×"
Prod left <$> pIRExpr
pPair :: Parser IRExpr
pPair = lexeme $ between (char '<') (char '>') $ do
left <- pIRExpr
_ <- symbol ","
Pair left <$> pIRExpr
pPi1 :: Parser IRExpr
pPi1 = lexeme $ symbol "π₁" >> Pi1 <$> pIRExpr
pPi2 :: Parser IRExpr
pPi2 = lexeme $ symbol "π₂" >> Pi2 <$> pIRExpr
pTerm :: Parser IRExpr
pTerm = lexeme $ label "term" $ choice [pSort, pPi1, pPi2, pPureVar, pVar, pProd, pPair, try pOpSection, parens pIRExpr]
pInfix :: Parser IRExpr
pInfix = parseWithPrec 0
where
parseWithPrec :: Int -> Parser IRExpr
parseWithPrec prec = pApp >>= continue prec
continue :: Int -> IRExpr -> Parser IRExpr
continue prec lhs = option lhs $ do
op <- lookAhead pSymbol
operators <- get
case M.lookup op operators of
Just (InfixDef fixity opFun) -> do
let (opPrec, nextPrec) = case fixity of
InfixL p -> (p, p)
InfixR p -> (p, p + 1)
if opPrec <= prec
then pure lhs
else do
_ <- pSymbol
rhs <- parseWithPrec nextPrec
continue prec $ opFun op lhs rhs
Nothing -> fail $ "unknown operator '" ++ toString op ++ "'"
pIRExpr :: Parser IRExpr
pIRExpr = lexeme $ choice [pLAbs, pALAbs, pPAbs, pLet, pInfix]
-- pAppTerm :: Parser IRExpr
-- pAppTerm = lexeme $ choice [pLAbs, pALAbs, pPAbs, pLet, pInfix]
--
-- pIRExpr :: Parser IRExpr
-- pIRExpr = lexeme $ do
-- e <- pAppTerm
-- option e $ (symbol "->" <|> symbol "→") >> Pi "" e <$> pIRExpr
pAscription :: Parser IRExpr
pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr
pAxiom :: Parser IRDef
pAxiom = lexeme $ label "axiom" $ do
pKeyword "axiom"
ident <- pIdentifier <|> pSymbol
params <- pManyParams
ascription <- fmap (flip (foldr mkPi) params) pAscription
symbol ";"
pure $ Axiom ident ascription
pVariable :: Parser [IRSectionDef]
pVariable = lexeme $ label "variable declaration" $ do
pKeyword "variable" <|> pKeyword "hypothesis"
vars <- pManyParams
symbol ";"
pure $ uncurry Variable <$> vars
pDef :: Parser IRDef
pDef = lexeme $ label "definition" $ do
pKeyword "def"
ident <- pIdentifier <|> pSymbol
params <- pManyParams
ascription <- fmap (flip (foldr mkPi) params) <$> optional pAscription
symbol ":="
body <- pIRExpr
symbol ";"
pure $ Def ident ascription $ foldr mkAbs body params
pFixityDec :: Parser ()
pFixityDec = do
_ <- string "infix"
fixity <-
choice
[ InfixL <$> (lexeme (char 'l') >> lexeme L.decimal)
, InfixR <$> (lexeme (char 'r') >> lexeme L.decimal)
]
ident <- pSymbol
modify $ M.insert ident $ InfixDef fixity $ (App .) . App . Var
symbol ";"
pSection :: Parser IRSectionDef
pSection = lexeme $ label "section" $ do
pKeyword "section"
secLabel <- pIdentifier
body <- concat <$> many pIRDef
pKeyword "end"
void $ lexeme $ string secLabel
pure $ Section secLabel body
pIRDef :: Parser [IRSectionDef]
pIRDef = (pure . IRDef <$> pAxiom) <|> (pure . IRDef <$> pDef) <|> pVariable <|> (pure <$> pSection) <|> ([] <$ pFixityDec)
pIRProgram :: Parser IRProgram
pIRProgram = skipSpace >> concat <$> some pIRDef
parserWrapper :: Parser a -> String -> Text -> Either String a
parserWrapper p filename input = first errorBundlePretty $ evalState (runParserT p filename input) initialOps
parseProgram :: String -> Text -> Either String IRProgram
parseProgram = parserWrapper pIRProgram
parseDef :: String -> Text -> Either String IRDef
parseDef = parserWrapper (pAxiom <|> pDef)
parseExpr :: String -> Text -> Either String IRExpr
parseExpr = parserWrapper pIRExpr
handleFile :: String -> ExceptT String IO IRProgram
handleFile filename = toString `withExceptT` runPreprocessor filename >>= hoistEither . parseProgram filename