{-# LANGUAGE TupleSections #-} module Parser where import Control.Monad.Combinators (option) import Control.Monad.Except import Data.List (foldl, foldl1) import qualified Data.Map.Strict as M import qualified Data.Text as T import Errors (Error (..)) import IR import Preprocessor import Text.Megaparsec (MonadParsec (..), ParsecT, ShowErrorComponent (..), between, choice, errorBundlePretty, label, runParserT, try) import Text.Megaparsec.Char import qualified Text.Megaparsec.Char.Lexer as L newtype TypeError = TE Error deriving (Eq, Ord) data Fixity = InfixL Int | InfixR Int deriving (Eq, Show) type Operators = Map Text Fixity type Parser = ParsecT TypeError Text (State Operators) instance ShowErrorComponent TypeError where showErrorComponent (TE e) = toString e skipSpace :: Parser () skipSpace = L.space space1 (L.skipLineComment "--") (L.skipBlockCommentNested "[*" "*]") lexeme :: Parser a -> Parser a lexeme = L.lexeme skipSpace parens :: Parser a -> Parser a parens = between (char '(') (char ')') symbol :: Text -> Parser () symbol = void . L.symbol skipSpace symbols :: String symbols = "!@#$%^&*-+=<>,./?[]{}\\|`~'\"∧∨⊙×≅" pKeyword :: Text -> Parser () pKeyword keyword = void $ lexeme (string keyword <* notFollowedBy alphaNumChar) keywords :: [Text] keywords = ["forall", "let", "in", "end", "fun", "def", "axiom", "section", "variable", "hypothesis"] pIdentifier :: Parser Text pIdentifier = try $ label "identifier" $ lexeme $ do ident <- T.pack <$> ((:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_')) guard $ ident `notElem` keywords pure ident pSymbol :: Parser Text pSymbol = lexeme $ takeWhile1P (Just "symbol") (`elem` symbols) pVar :: Parser IRExpr pVar = label "variable" $ lexeme $ Var <$> pIdentifier pPureVar :: Parser IRExpr pPureVar = label "variable" $ lexeme $ symbol "#" >> PureVar <$> pIdentifier pParamGroup :: Parser [Param] pParamGroup = lexeme $ label "parameter group" $ parens $ do idents <- some $ pIdentifier <|> pSymbol symbol ":" ty <- pIRExpr pure $ map (,ty) idents pSomeParams :: Parser [Param] pSomeParams = lexeme $ concat <$> some pParamGroup pManyParams :: Parser [Param] pManyParams = lexeme $ concat <$> many pParamGroup mkAbs :: (Text, IRExpr) -> IRExpr -> IRExpr mkAbs (param, typ) = Abs param typ mkPi :: (Text, IRExpr) -> IRExpr -> IRExpr mkPi (param, typ) = Pi param typ pLAbs :: Parser IRExpr pLAbs = lexeme $ label "λ-abstraction" $ do _ <- pKeyword "fun" <|> symbol "λ" params <- pSomeParams _ <- symbol "=>" <|> symbol "⇒" body <- pIRExpr pure $ foldr mkAbs body params pALAbs :: Parser IRExpr pALAbs = lexeme $ label "λ-abstraction" $ do _ <- symbol "[" args <- some pIdentifier _ <- symbol ":" typ <- pIRExpr _ <- symbol "]" body <- pIRExpr pure $ foldr (mkAbs . (,typ)) body args pPAbs :: Parser IRExpr pPAbs = lexeme $ label "Π-abstraction" $ do _ <- pKeyword "forall" <|> symbol "∏" <|> symbol "∀" params <- pSomeParams symbol "," body <- pIRExpr pure $ foldr mkPi body params pBinding :: Parser (Text, Maybe IRExpr, IRExpr) pBinding = lexeme $ label "binding" $ do symbol "(" ident <- pIdentifier params <- pManyParams ascription <- optional pAscription symbol ":=" value <- pIRExpr symbol ")" pure ( ident , flip (foldr mkPi) params <$> ascription , foldr mkAbs value params ) pLet :: Parser IRExpr pLet = lexeme $ label "let expression" $ do pKeyword "let" bindings <- some pBinding pKeyword "in" body <- pIRExpr pKeyword "end" pure $ foldr letTuple body bindings where letTuple :: (Text, Maybe IRExpr, IRExpr) -> IRExpr -> IRExpr letTuple (name, ascription, value) = Let name ascription value pApp :: Parser IRExpr pApp = lexeme $ foldl1 App <$> some pTerm pStar :: Parser IRExpr pStar = lexeme $ Star <$ symbol "★" subscriptDigit :: Parser Integer subscriptDigit = choice [ symbol "₀" >> pure 0 , symbol "₁" >> pure 1 , symbol "₂" >> pure 2 , symbol "₃" >> pure 3 , symbol "₄" >> pure 4 , symbol "₅" >> pure 5 , symbol "₆" >> pure 6 , symbol "₇" >> pure 7 , symbol "₈" >> pure 8 , symbol "₉" >> pure 9 ] subscriptInt :: Parser Integer subscriptInt = foldl (\acc d -> acc * 10 + d) 0 <$> some subscriptDigit pSquare :: Parser IRExpr pSquare = lexeme $ (symbol "□" <|> symbol "[]") >> option (Level 0) (Level <$> (L.decimal <|> subscriptInt)) pSort :: Parser IRExpr pSort = lexeme $ pStar <|> pSquare pOpSection :: Parser IRExpr pOpSection = lexeme $ parens $ Var <$> pSymbol pTerm :: Parser IRExpr pTerm = lexeme $ label "term" $ choice [pSort, pPureVar, pVar, try pOpSection, parens pIRExpr] pInfix :: Parser IRExpr pInfix = parseWithPrec 0 where parseWithPrec :: Int -> Parser IRExpr parseWithPrec prec = pApp >>= continue prec continue :: Int -> IRExpr -> Parser IRExpr continue prec lhs = option lhs $ do op <- lookAhead pSymbol operators <- get case M.lookup op operators of Just fixity -> do let (opPrec, nextPrec) = case fixity of InfixL p -> (p, p) InfixR p -> (p, p + 1) if opPrec <= prec then pure lhs else do _ <- pSymbol rhs <- parseWithPrec nextPrec continue prec (App (App (Var op) lhs) rhs) Nothing -> fail $ "unknown operator '" ++ toString op ++ "'" pAppTerm :: Parser IRExpr pAppTerm = lexeme $ choice [pLAbs, pALAbs, pPAbs, pLet, pInfix] pIRExpr :: Parser IRExpr pIRExpr = lexeme $ do e <- pAppTerm option e $ (symbol "->" <|> symbol "→") >> Pi "" e <$> pIRExpr pAscription :: Parser IRExpr pAscription = lexeme $ try $ symbol ":" >> label "type" pIRExpr pAxiom :: Parser IRDef pAxiom = lexeme $ label "axiom" $ do pKeyword "axiom" ident <- pIdentifier params <- pManyParams ascription <- fmap (flip (foldr mkPi) params) pAscription symbol ";" pure $ Axiom ident ascription pVariable :: Parser [IRSectionDef] pVariable = lexeme $ label "variable declaration" $ do pKeyword "variable" <|> pKeyword "hypothesis" vars <- pManyParams symbol ";" pure $ uncurry Variable <$> vars pDef :: Parser IRDef pDef = lexeme $ label "definition" $ do pKeyword "def" ident <- pIdentifier <|> pSymbol params <- pManyParams ascription <- fmap (flip (foldr mkPi) params) <$> optional pAscription symbol ":=" body <- pIRExpr symbol ";" pure $ Def ident ascription $ foldr mkAbs body params pFixityDec :: Parser () pFixityDec = do _ <- string "infix" fixity <- choice [ InfixL <$> (lexeme (char 'l') >> lexeme L.decimal) , InfixR <$> (lexeme (char 'r') >> lexeme L.decimal) ] ident <- pSymbol modify (M.insert ident fixity) symbol ";" pSection :: Parser IRSectionDef pSection = lexeme $ label "section" $ do pKeyword "section" secLabel <- pIdentifier body <- concat <$> many pIRDef pKeyword "end" void $ lexeme $ string secLabel pure $ Section secLabel body pIRDef :: Parser [IRSectionDef] pIRDef = (pure . IRDef <$> pAxiom) <|> (pure . IRDef <$> pDef) <|> pVariable <|> (pure <$> pSection) <|> ([] <$ pFixityDec) pIRProgram :: Parser IRProgram pIRProgram = skipSpace >> concat <$> some pIRDef parserWrapper :: Parser a -> String -> Text -> Either String a parserWrapper p filename input = first errorBundlePretty $ evalState (runParserT p filename input) M.empty parseProgram :: String -> Text -> Either String IRProgram parseProgram = parserWrapper pIRProgram parseDef :: String -> Text -> Either String IRDef parseDef = parserWrapper (pAxiom <|> pDef) parseExpr :: String -> Text -> Either String IRExpr parseExpr = parserWrapper pIRExpr handleFile :: String -> ExceptT String IO IRProgram handleFile filename = toString `withExceptT` runPreprocessor filename >>= hoistEither . parseProgram filename