From c4262a10e256c220c678fae6088631c53858d214 Mon Sep 17 00:00:00 2001 From: William Ball Date: Wed, 14 Aug 2024 14:42:08 -0700 Subject: [PATCH] needs a LOT of cleaning, but basics of proof parser working --- .../wball/proof-checker/parser/ParseUtils.sml | 11 ++- .../proof-checker/parser/ProofParser.sml | 83 +++++++++++++++++++ .../wball/proof-checker/parser/Scanner.sml | 15 ++-- .../wball/proof-checker/parser/TOKEN.sig | 51 ------------ .../wball/proof-checker/parser/Token.sml | 39 +++++++-- .../wball/proof-checker/parser/parser.mlb | 2 +- .../wball/proof-checker/parser/repl.sml | 46 ++++------ 7 files changed, 153 insertions(+), 94 deletions(-) create mode 100644 lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ProofParser.sml delete mode 100644 lib/forgejo.ballcloud.cc/wball/proof-checker/parser/TOKEN.sig diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ParseUtils.sml b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ParseUtils.sml index 91a548c..155b27c 100644 --- a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ParseUtils.sml +++ b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ParseUtils.sml @@ -2,6 +2,7 @@ signature PARSEUTILS = sig include PARSE val >>= : 'a p * ('a -> 'b p) -> 'b p + val >>=? : 'a p * ('a -> 'a p) -> 'a p end structure ParseUtils: PARSEUTILS = @@ -10,10 +11,18 @@ struct Parse (type token = Token.t val pp_token = Token.pp_token) open ParseUtilsTemp - infix >>= + infix >>= >>=? fun (p >>= q) ts = case p ts of OK (v, r, ts') => q v ts' | NO e => NO e + + fun (p >>=? q) ts = + case p ts of + s1 as (OK (v, r, ts')) => + (case q v ts' of + OK s2 => OK s2 + | NO _ => s1) + | NO e => NO e end diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ProofParser.sml b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ProofParser.sml new file mode 100644 index 0000000..a389842 --- /dev/null +++ b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/ProofParser.sml @@ -0,0 +1,83 @@ +structure ProofParser: PARSER = +struct + type result = Proof.t + + structure StringSet = Utils.StringSet + structure ArityMap = Utils.ArityMap + structure T = Token + + type state = + {constants: StringSet.t, functions: ArityMap.t, relations: ArityMap.t} + + open ParseUtils + infix >>> ->> >>- ?? ??? || oo oor ??* >>= >>=? + + open Proof + + fun number_p nil = + NO (Region.botloc, fn () => "expecting number but reached the end") + | number_p ((Token.NUMBER num, r) :: rest) = OK (num, r, rest) + | number_p ((t, r) :: rest) = + NO (#1 r, fn () => + "expecting number but found token " ^ Token.pp_token t) + + val const = Utils.const + val label_p = + (eat (T.KEYWORD T.Axiom) oo const Axiom) + || (eat (T.KEYWORD T.AndI) oo const AndI) + || (eat (T.KEYWORD T.AndE) oo const AndE) + || (eat (T.KEYWORD T.OrI) oo const OrI) + || (eat (T.KEYWORD T.OrE) oo const OrE) + || (eat (T.KEYWORD T.ArrI) oo const ArrI) + || (eat (T.KEYWORD T.ArrE) oo const ArrE) + || (eat (T.KEYWORD T.NegI) oo const NegI) + || (eat (T.KEYWORD T.NegE) oo const NegE) + || (eat (T.KEYWORD T.RAA) oo const RAA) + || (eat (T.KEYWORD T.IffI) oo const IffI) + || (eat (T.KEYWORD T.IffE) oo const IffE) + || (eat (T.KEYWORD T.EqI) oo const EqI) + || (eat (T.KEYWORD T.EqE) oo const EqE) + || (eat (T.KEYWORD T.ForallI) oo const ForallI) + || (eat (T.KEYWORD T.ForallE) oo const ForallE) + || (eat (T.KEYWORD T.ExistsI) oo const ExistsI) + || (eat (T.KEYWORD T.ExistsE) oo const ExistsE) + + fun fold_lines lines (_, NO l) = NO l + | fold_lines lines (line, OK (children, r, rest)) = + OK (List.nth (lines, line - 1) :: children, r, rest) + handle Subscript => + NO (#1 r, fn () => "Invalid line reference " ^ (Int.toString line)) + + fun line_refs_p lines ts = + case (((number_p oo (fn x => [x])) ??* number_p) (Utils.flip op::)) ts of + OK (nums, r, rest) => foldl (fold_lines lines) (OK ([], r, rest)) nums + | NO l => NO l + + fun refless_tag_p ts = + ((eat T.LBRACKET ->> label_p >>- eat T.RBRACKET) + oo (fn label => (label, []))) ts + + fun ref_tag_p lines = + eat T.LBRACKET ->> label_p >>> line_refs_p lines >>- eat T.RBRACKET + + fun tag_p lines = refless_tag_p || ref_tag_p lines + + fun line_p (state, lines) : Proof.t p = + ((number_p >>- eat T.DOT oo const ()) + ->> + (FormulaParser.parse + { binders = [] + , constants = #constants state + , functions = #functions state + , relations = #relations state + }) >>> (tag_p lines)) + oo + (fn (phi, (label, children)) => + Proof {formula = phi, label = label, children = children}) + + fun all_lines_p (state, lines) : Proof.t p = + line_p (state, lines) >>=? (fn line => all_lines_p (state, line :: lines)) + + fun parse state ts = + all_lines_p (state, []) ts +end diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Scanner.sml b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Scanner.sml index 0ced7d1..950706f 100644 --- a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Scanner.sml +++ b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Scanner.sml @@ -5,6 +5,8 @@ struct fun map_token (Symb "(") = LPAREN | map_token (Symb ")") = RPAREN + | map_token (Symb "[") = LBRACKET + | map_token (Symb "]") = RBRACKET | map_token (Symb ".") = DOT | map_token (Symb ",") = COMMA | map_token (Symb "#") = HASH @@ -19,17 +21,20 @@ struct (case KeywordMap.get (keywordMap, id) of SOME keyword => KEYWORD keyword | NONE => IDENT id) - | map_token (Num _) = ERR + | map_token (Num num) = + NUMBER (valOf (Int.fromString num)) - fun const k _ = k + val is_num = (List.all Char.isDigit) o String.explode + fun is_id s = + ((List.all Char.isAlphaNum) (String.explode s)) andalso not (is_num s) val scan = map (fn (t, r) => (map_token t, r)) o tokenise - { sep_chars = "(),&|.#~" + { sep_chars = "[](),&|.#~" , symb_chars = "<=>" - , is_id = (List.all Char.isAlphaNum) o String.explode - , is_num = const false + , is_id = is_id + , is_num = is_num } end diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/TOKEN.sig b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/TOKEN.sig deleted file mode 100644 index 7b711db..0000000 --- a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/TOKEN.sig +++ /dev/null @@ -1,51 +0,0 @@ -signature TOKEN = -sig - datatype keyword = - (* labels *) - Axiom - | AndI - | AndE - | OrI - | OrE - | ArrI - | ArrE - | NegI - | NegE - | RAA - | IffI - | IffE - | EqI - | EqE - | ForallI - | ForallE - | ExistsI - | ExistsE - - (* formulas *) - | Forall - | Exists - - structure KeywordMap: MAP where type key = string and type value = keyword - val keywordMap: KeywordMap.t - - datatype t = - LPAREN - | RPAREN - | NEG - | AND - | OR - | EQ - | RARROW - | LRARROW - | DOT - | COMMA - | HASH - | KEYWORD of keyword - | IDENT of string - | ERR - - val pp_token: t -> string - - val infix_connective: t -> bool - val prefix_connective: t -> bool -end diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Token.sml b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Token.sml index 166238d..12042a5 100644 --- a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Token.sml +++ b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/Token.sml @@ -1,4 +1,4 @@ -structure Token: TOKEN = +structure Token = struct datatype keyword = Axiom @@ -25,6 +25,8 @@ struct datatype t = LPAREN | RPAREN + | LBRACKET + | RBRACKET | NEG | AND | OR @@ -36,6 +38,7 @@ struct | HASH | KEYWORD of keyword | IDENT of string + | NUMBER of int | ERR structure KeywordMap = Map (structure Ord = StringOrd type value = keyword) @@ -58,13 +61,37 @@ struct , ("ForallE", ForallE) , ("ExistsI", ExistsI) , ("ExistsE", ExistsE) - , ("Forall", Forall) - , ("Exists", Exists) + , ("forall", Forall) + , ("exists", Exists) ] + val keywordToString = + fn Axiom => "Axiom" + | AndI => "AndI" + | AndE => "AndE" + | OrI => "OrI" + | OrE => "OrE" + | ArrI => "ArrI" + | ArrE => "ArrE" + | NegI => "NegI" + | NegE => "NegE" + | RAA => "RAA" + | IffI => "IffI" + | IffE => "IffE" + | EqI => "EqI" + | EqE => "EqE" + | ForallI => "ForallI" + | ForallE => "ForallE" + | ExistsI => "ExistsI" + | ExistsE => "ExistsE" + | Forall => "forall" + | Exists => "exists" + val pp_token = fn LPAREN => "(" | RPAREN => ")" + | LBRACKET => "[" + | RBRACKET => "]" | NEG => "~" | AND => "&" | OR => "|" @@ -73,11 +100,11 @@ struct | LRARROW => "<=>" | DOT => "." | COMMA => "," - | FORALL => "forall" - | EXISTS => "exists" - | ERR => "unk" | HASH => "#" + | KEYWORD keyword => keywordToString keyword | IDENT s => s + | NUMBER num => Int.toString num + | ERR => "unk" val infix_connective = fn AND => true | OR => true | RARROW => true | LRARROW => true | _ => false diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/parser.mlb b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/parser.mlb index 570e6aa..ae1dc74 100644 --- a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/parser.mlb +++ b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/parser.mlb @@ -3,7 +3,6 @@ $(SML_LIB)/basis/basis.mlb ../utils/utils.mlb ../../../../github.com/diku-dk/sml-parse/parse.mlb ../../../../github.com/diku-dk/sml-parse/simple_token.mlb -TOKEN.sig Token.sml SCANNER.sig Scanner.sml @@ -11,4 +10,5 @@ ParseUtils.sml PARSER.sig TermParser.sml FormulaParser.sml +ProofParser.sml repl.sml diff --git a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/repl.sml b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/repl.sml index f058689..4c27638 100644 --- a/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/repl.sml +++ b/lib/forgejo.ballcloud.cc/wball/proof-checker/parser/repl.sml @@ -1,11 +1,11 @@ open Scanner -structure Testing: -sig - val parse_repl: string -> unit -end = +structure Testing = +(* sig *) +(* val parse_repl: string -> unit *) +(* end = *) struct - open FormulaParser + open ProofParser open Utils open ParseUtils open Scanner @@ -14,31 +14,17 @@ struct val constants = StringSet.fromList ["zero"] val relations = ArityMap.fromList [("lt", 2)] val state = - { binders = [] - , functions = functions - , constants = constants - , relations = relations - } + {functions = functions, constants = constants, relations = relations} fun parse_repl s = - let - val toks = scan {input = s, srcname = "repl"} - in - case parse state toks of - OK (t, _, []) => print ("Parsed successfully: " ^ Formula.pp t ^ "\n") - | OK (t, r, rest) => - print - ("Parsed '" ^ Formula.pp t ^ "' up to " ^ Region.pp r ^ ", but " - ^ Int.toString (length rest) ^ " tokens remain unparsed\n") - | NO (r, err) => print ("ERROR: " ^ Region.ppLoc r ^ ", " ^ err () ^ "\n") + let val toks = scan {input = s, srcname = "repl"} + in parse state toks + (* case parse state toks of *) + (* OK (t, _, []) => print ("Parsed successfully: " ^ Formula.pp t ^ "\n") *) + (* | OK (t, r, rest) => *) + (* print *) + (* ("Parsed '" ^ Formula.pp t ^ "' up to " ^ Region.pp r ^ ", but " *) + (* ^ Int.toString (length rest) ^ " tokens remain unparsed\n") *) + (* | NO (r, err) => print ("ERROR: " ^ Region.ppLoc r ^ ", " ^ err () ^ "\n") *) end -end - -fun repl () = - ( print "> " - ; case (TextIO.inputLine TextIO.stdIn) of - SOME s => (Testing.parse_repl s; repl ()) - | NONE => () - ) - -fun main () = repl () (* val () = main () *) +end (* fun repl () = *) (* ( print "> " *) (* ; case (TextIO.inputLine TextIO.stdIn) of *) (* SOME s => (Testing.parse_repl s; repl ()) *) (* | NONE => () *) (* ) *) (* fun main () = repl () (1* val () = main () *1) *)