Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- gab-lpeg.lua: an explanation for the LPeg tricks used in gab.lua. -- Latest version of this file: -- http://angg.twu.net/gab/gab-lpeg.lua.html -- (find-gab "gab-lpeg.lua") -- Author: Eduardo Ochs <eduardoochs@gmail.com> -- Varsion: 2012may15 -- Public domain. -- -- I sent the code below to lua-l in 2012may14. See: -- http://article.gmane.org/gmane.comp.lang.lua.general/91233 -- See also: -- http://angg.twu.net/gab/README.html -- --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) loadlpeg() --]==] require "lpeg" -- A nicer syntax for local variables in LPeg patterns. set = function (name, o) if getmetatable(o) == getmetatable(lpeg.P"") -- if o is a pattern then return o:Cg(name) -- then name := eval(o) else return lpeg.Cc(o):Cg(name) -- else name := o (constant) end end get = function (name) return lpeg.Cb(name) end group = function (cmds) return lpeg.Cg(cmds) end -- A trivial example: patt = set("x", "2") * get("x") * group(set("x", "3") * get("x") ) * get("x") print(patt:match"") --> 2 3 2 -- A bigger example: -- binary operators, with precedence. -- -- Suppose that E is an expression in tree form - for example, this: -- -- + -- / \ -- E = * < -- / \ / \ -- 2 3 4 5 -- -- Then to find out how to print E in infix notation we need to -- determine, for each of its "wires", whether we need or not to add -- parentheses around the subexpression under it. The best way to do -- that is to associate to each operator three numbers, that we will -- call "top", "left" and "right" (op.t, op.l, or.r). We get his: -- ___ -- /19 \ -- / + \ -- |_18_20_| -- / \() -- ___ ___ -- /21 \ /17 \ -- / * \ / < \ -- |_20_22_| |_18_18_| -- / \ / \ -- 2 3 4 5 -- -- The wires that need parentheses are the ones in which the number -- above is higher than the number below, and we get: -- -- 2 * 3 + (4 < 5) -- -- This idea can be easily adapted to parsing. After reading the "*" -- we need to parse its second argument - the longest "expression -- under the `*'" possible. Let's look at a bigger example: -- -- &______________. -- | | -- <=_. >__. -- | | | | -- 2 +__. 7 8 -- /------------------------\ | | -- /----------------\ 3 *_____. -- /-----------\ | | -- /-------\ *__. 6 -- /---\ /---\ | | -- 2 <= 3 + 4 * 5 * 6 & 7 > 8 4 5 -- -- The second argument for the `+' in that expression is the longest -- "expression under the `+'" possible, starting at the "4". In the -- grammar below, this will be an "expression without complements" - -- just the "4" - followed by two "complements", "* 5" and "* 6", -- which we will be added to the "4" using lpeg.Cf: -- -- http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-f -- -- Note: I took the idea of op.t, op.l and op.r from Andrej Bauer's -- PLZoo (but he uses that only for printing, not for parsing)... -- See: -- -- http://andrej.com/plzoo/ -- http://andrej.com/plzoo/html/calc.html -- http://www.gnu.org/software/bison/manual/html_node/Infix-Calc.html ops = {} newbinop = function (l, t, r, str) for op in str:gmatch("[^ ]+") do ops[op] = {l=l, t=t, r=r} end end newbinop(10, 11, 12, "->") newbinop(12, 13, 14, "or") newbinop(14, 15, 16, "&") newbinop(18, 17, 18, "== <= >= < > != <-") newbinop(18, 19, 20, "+ -") newbinop(20, 21, 22, "* /") pack_bin = function (e1, op, e2) return "("..op.." "..e1.." "..e2..")" end apply_all = function (e, T) return T[#T](e, unpack(T, 1, #T-1)) end above = function (lop, rop) return lop == "" or ops[lop].r < ops[rop].t end is_binop = function (subj, pos, c) if ops[c] then return true, c end end is_under = function (subj, pos, lop, op) if above(lop, op) then return true, op end end P = lpeg.P V = lpeg.V Cc = lpeg.Cc sp = lpeg.S" \t\n"^0 sP = function (str) return sp * P(str) end Expr_grammar = { "Expr", -- initial rule name Num = sp * (lpeg.R"09"^1):C(), Parenexpr = sP"(" * V"Expr" * sP")", -- Binop_any = sp * ((lpeg.P(2):Cmt(is_binop) + lpeg.P(1):Cmt(is_binop))), Binop_under = (get"lop" * V"Binop_any"):Cmt(is_under), Binop = set("lop", V"Binop_under") * get("lop"), -- Complement = group(V"Binop" * V"Expr_under" * Cc(pack_bin)):Ct(), -- Expr_woc = V"Num" + V"Parenexpr", Expr_under = (V"Expr_woc" * V"Complement"^0):Cf(apply_all), Expr = group(set("lop", "") * V"Expr_under"), } Expr_pattern = lpeg.P(Expr_grammar) print(Expr_pattern:match"2 <= 3 + 4 * 5 * 6 & 7 > 8") --> (& (<= 2 (+ 3 (* (* 4 5) 6))) (> 7 8)) -- Local Variables: -- coding: raw-text-unix -- ee-anchor-format: "«%s»" -- End: