Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- Taken from: -- (find-dn5 "gab.lua" "old-parser") -- -- _ __ __ _ _ __ ___ ___ -- | '_ \ / _` | '__/ __|/ _ \ -- | |_) | (_| | | \__ \ __/ -- | .__/ \__,_|_| |___/\___| -- |_| -- «precedence-table» (to ".precedence-table") -- «precedence» (to ".precedence") -- (find-dn5 "gab-tests.lua" "parse-2") -- (find-dn5 "gab-tests.lua" "parse-3") ops = {} binop = function (op, top, l, r) ops[op] = {kind="binop", top=top, l=l, r=r} end prefix = function (op, top, r) ops[op] = {kind="prefix", top=top, r=r} end binop("*", 8, 7, 8) binop("/", 8, 7, 8) binop("+", 7, 6, 7) binop("-", 7, 6, 7) binop("==", 6, 6, 6) binop("<=", 6, 6, 6) binop(">=", 6, 6, 6) binop("<", 6, 6, 6) binop(">", 6, 6, 6) binop("!=", 6, 6, 6) binop("&", 4, 3, 4) binop("|", 3, 2, 3) binop("->", 2, 2, 2) prefix("u-", 9, 100) prefix("nt", 5, 4) binop("in", 6, 6, 6) binop("<-", 6, 6, 6) -- «recursive-descent» (to ".recursive-descent") subj = split "2 + 3 * 4 + 5 * 6 * 7 (eof)" pos = 1 parse = function (str, b) subj = split(str) -- token are separated by spaces pos = 1 return pa_expr(b) end pparse = function (str, b) -- print the result of a parse local e = parse(str, b) print(e:tolisp()) print(e:torect()) return e end pparsee = function (str, b) pparse(str):peval() end -- Abbreviations: -- "la" = "looking at" -- "pa" = "parse" -- "wos" = "without suffixes" -- «parser-grammar» (to ".parser-grammar") -- We use a very simple recursive parser with one token of lookahead. -- It operates on the array "subj", that at the moment is just an -- array of strings obtained by running "split" on a given string - -- to avoid lexing. Our grammar is: -- -- parenexpr ::= ( expr ) -- setexpr ::= { } -- | { expr commaexpr^* } -- | { expr | expr commaexpr^* } -- qlexpr ::= Fa expr . expr -- | Ex expr . expr -- | \\ expr . expr -- exprwos ::= parenexpr -- | setexpr -- | qlexpr -- | prefixop expr -- | number -- | var -- complement1 ::= suffixop -- | parenexpr -- | binop expr -- expr ::= exprwos complement1^* -- -- Note that each alternative starts with a different token - we use -- the "la_*" functions to choose which alternative to follow. perror = function (str) error("(la = `"..la().."') "..str, 2) end la = function () return subj[pos] end eat = function () pos = pos+1; return subj[pos-1] end pa_forced = function (token) if la() ~= token then perror("Expected `"..token.."'") end return eat() end under = function (b, op) return (not b) or b <= ops[op].top end la_op = function (k) return ops[la()] and ops[la()].kind == k end la_op_under = function (k, b) return la_op(k) and under(b, la()) end la_binop = function (b) return la_op_under("binop", b) end la_prefix = function (b) return la_op_under("prefix", b) end la_suffix = function (b) return la_op_under("suffix", b) end -- la_number = function () return (la():match"^[0-9]+$") end pa_number = function () return Num(eat()+0) end la_var = function () return (la():match"^[A-Za-z]+$") end pa_var = function () return Var(eat()) end la_paren = function () return la() == "(" end pa_parenclose = function () return pa_forced(")") end pa_parenexpr = function () local o, e, c = eat(), pa_expr(), pa_parenclose() return e end pa_parenexpr = function () local o, e, es, c = eat(), pa_expr(), pa_commaexprs(), pa_parenclose() if #es == 0 then return e else return Tuple(e, unpack(es)) end end la_expr = function () return la_number() or la_var() or la_paren() or la_set() or la_prefix() end pa_exprwos = function () -- PP("pa_exprwos with pos = "..pos) if la_paren() then return pa_parenexpr() elseif la_set() then return pa_set() elseif la_ql() then return pa_qlexpr() elseif la_prefix() then local op = eat() local e = pa_expr(ops[op].r) return Expr {[0]=op, e} elseif la_number() then return pa_number() elseif la_var() then return pa_var() end perror("Not an expr (in exprwos)") end la_complement = function (b) return la_suffix(b) or la_paren() or la_binop(b) end pa_expr = function (b) -- PP("pa_expr with pos = "..pos) local e = pa_exprwos() while la_complement(b) do if la_suffix(b) then e = Expr {[0]=eat(), e} elseif la_paren() then e = App(e, pa_parenexpr()) elseif la_binop(b) then local op = eat() local re = pa_expr(ops[op].r) e = Expr {[0]=op, e, re} else perror("Not an expr") end end return e end -- (find-angg "LUA/lua50init.lua" "pack-and-unpack") la_set = function () return la() == "{" end pa_setclose = function () return pa_forced("}") end pa_commaexprs = function () local es = {} while la() == "," do eat(); table.insert(es, pa_expr()) end return es end pa_set = function () eat() if la() == "}" then eat(); return Set() elseif la_expr() then local e = pa_expr(4) if la() == "|" then local _, er, ers, _ = eat(), pa_expr(), pa_commaexprs(), pa_setclose() if e[0] == "<-" then local s = Subset(e, er, unpack(ers)) table.insert(s, e[1]) return s else local s = Setof(er, unpack(ers)) table.insert(s, e) return s end else local es, _ = pa_commaexprs(), pa_setclose() return Set(e, unpack(es)) end end perror("Not a set") end la_ql = function () return la() == "Fa" or la() == "Ex" or la() == "\\" end pa_dot = function () return pa_forced(".") end pa_qlexpr = function () local ql, vare, dot, bodye = eat(), pa_expr(), pa_dot(), pa_expr() if ql == "\\" then return Lambda(vare, bodye) end if vare[0] == "<-" or vare[0] == "in" then return Expr {[0]=ql, vare[1], vare[2], bodye} else perror("varexpr must be `_ <- _' or `_ in _'") end end -- Local Variables: -- coding: raw-text-unix -- ee-anchor-format: "«%s»" -- End: