Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- elisp.lua: parse and interpret sexp hyperlinks. -- This file: -- http://angg.twu.net/blogme4/elisp.lua.html -- http://angg.twu.net/blogme4/elisp.lua -- (find-blogme4 "elisp.lua") -- Author: Eduardo Ochs <eduardoochs@gmail.com> -- Version: 2011aug01 -- License: GPL3 -- -- The docs below are a mess!!! -- -- Let me start by supposing that you know what a sexp is. Then you -- know what a "sexp one-liner" is, and I will say that a line "has an -- elink" if it is made of some "prefix characters" (possibly zero), -- then a sexp (a list), then optional spaces. -- The "sexp hyperlinks" used by eev are elinks. -- See: (find-eevarticlesection "hyperlinks") -- http://en.wikipedia.org/wiki/S-expression -- -- One of the hardest parts of htmlzing the material in -- http://angg.twu.net/ is that many of the files there require -- htmlizing "sexp hyperlinks", like this one: -- -- (find-blogme4 "def.lua" "undollar") -- -- the htmlization makes the "find-blogme4" into a link to a section -- of the documentation about eev, and makes the two last chars of the -- sexp, '")', behave somehow like what the sexp would do when run in -- Emacs... -- (find-blogme4 "hyperlinks") -- Here is a rough sketch of what we need to do on each line that may -- end with a sexp ("rough sketch" means "the details are below, -- scattered around")... We need to: -- -- 1) detect whether that line ends with a sexp, -- 2) split each line that ends with a sexp into what comes before -- the sexp (we call that the "pre"), the hyperlink itself (the -- "sexp") and the optional spaces after the sexp ("the spaces"), -- 3) split the sexp into its "elements", -- 4) check whether the first "element" (the "head") is a symbol, -- 5) check whether the "head" has an entry in the table "ewords", -- 6) if it has, we need to run sexp:sexphtml(), that usually: -- 7) splits the sexp into an "opening parenthesis" (the "o"), -- the "word" (the "w"), the "rest" ("r") and the "close" -- (usually the two last chars - '")'), -- 8) determines the "help url" that will be associated to the -- "word" and the "target url" that will be associated to the -- "close", -- 9) compose "o", "w", "r", "c" and the help url and the target -- url to build an htmlization of the sexp, -- -- I found a nice hackish way to detect if a line "has an elink". -- The algorithm is non-recursive, does not backtrack, runs very -- quickly, and can be implemented in Lua using just string.gsub, -- string.reverse and string.match. And it doesn't need Lpeg!... -- -- The rough idea is: -- 1) first simplify all literal strings - like "foo bar" - by: -- 1a) replacing all backslash-char pairs by "__"s, and then -- 1b) replacing all chars inside double-quotes by "_"s; -- 2) then, starting from the right, use Lua's "%b" pattern to find -- matching "()"s. -- -- Part of the trick is that we use string.reverse judiciously at the -- right points of the algorithm to perform pattern matches "starting -- from the right". Also, we produce a "simplified string" and work on -- it, but we keep the original string (that has the same length as -- the simplified one), and after doing all the parsing and -- discovering where the sexp and all its "elements" start and end we -- go back to the original string. -- -- Here's an example that illustrates how the algorithm works. -- line = [[ # (foo "a") (bar "plic: \"ploc\"") ]] -- -- skel = [[ # (foo "a") (bar "plic: __ploc__") ]] -- leks = [[ # (foo "_") (bar "______________") ]]:reverse() -- secaps = [[ ]]:reverse() -- lekspxes = [[(bar "______________")]]:reverse() -- sexpskel = [[(bar "______________")]] -- erp = [[ # (foo "_") ]]:reverse() -- -- pre = [[ # (foo "a") ]] -- sexp = [[(bar "plic: \"ploc\"")]] -- spaces = [[ ]] -- 1 = {0=[[bar]], 2, 5}, -- 2 = {0=[["plic: \"ploc\""]], 6, 22} -- -- o = [[(]] -- w = [[bar]] -- r = [[ "plic: \"ploc\"]] -- c = [[")]] require "eoo" -- (find-blogme4 "eoo.lua") require "common" -- (find-blogme4 "common.lua") Q = Q or id -- (find-blogme4 "anggdefs.lua" "Q") -- Some utility functions notdir = function (str) return str:match "[^/]$" end -- "" is a directory addfileext = function (fname, ext) if fname and ext and notdir(fname) then return fname..ext end return fname end addanchor = function (url, anchor) if url and anchor then return url.."#"..anchor end return url end addextanchor = function (fname, ext, anchor) return addanchor(addfileext(fname, ext), anchor) end href_ = function (url, text) if url then return "<a href=\""..url.."\">"..text.."</a>" end return text end buildurl_ = function (base, offset, ext, anchor) if not offset then return end local url = base..offset if notdir(url) and ext then url = url..ext end if anchor then url = url.."#"..anchor end return url end Sexpline = Class { type = "Sexpline", __index = { -- Two functions to split fields, calculating new fields. -- This one splits "line" into "pre", "sexp", and "spaces", -- and, as a bonus, it obtain the "elements" of the sexp -- (stored in integer-indexed positions). presexpspaces_ = function (sexpline) local line = sexpline.line local skel = line:gsub("\\.", "__") local leks = skel:reverse() local f = function (s) return '"'..("_"):rep(#s)..'"' end local leks = leks:gsub('"([^"]-)"', f) local secaps, lekspxes, erp = leks:match("^([ \t]*)(%b)()(.*)") if not erp then return end local pre = line:sub(1, #erp) local sexpskel = lekspxes:reverse() local sexp = line:sub(1+#pre, #pre+#sexpskel) local spaces = secaps:reverse() -- bonus: split the "sexp" into its "elements" and store them -- as tables in integer-indexed fields in the sexpline structure. local n, pos = 0, 2 local parseelement = function (pat) local s, e = sexpskel:match(pat, pos) if s then n = n + 1 sexpline[n] = {s, e, [0]=sexp:sub(s, e-1)} pos = e return true end end while parseelement "^[ \t]*()[^ \t\"()]+()" -- symbol or number or parseelement "^[ \t]*()\"_*\"()" -- string or parseelement "^[ \t]*()%b()()" do -- list end sexpline.pre = pre sexpline.sexp = sexp sexpline.spaces = spaces return true end, -- This one splits the "sexp" field into "o", "w", "r", "c" -- (for the standard way of htmlizing sexp hyperlinks). owrc_ = function (sexpline) if sexpline.sexp then local pat = "^(%()([-!$%&*+,/:<=>?@^_0-9A-Za-z]+)(.-)(\"?%))$" local o, w, r, c = sexpline.sexp:match(pat) -- open, word, rest, close sexpline.o = o sexpline.w = w sexpline.r = r sexpline.c = c return true end end, -- Notice the logical gap here! "sexphtml__" uses the fields -- "helpurl" and "targeturl", that are set by "sexpurls_" (below). -- About specials (like images): they're not supported yet! sexphtml__ = function (sexpline) if sexpline.helpurl or sexpline.targeturl then sexpline.sexphtml = sexpline.o .. href_(sexpline.helpurl, Q(sexpline.w)) .. Q(sexpline.r) .. href_(sexpline.targeturl, sexpline.c ) return true end end, linehtml__ = function (sexpline, htmlizer) htmlizer = htmlizer or Q if sexpline.sexphtml then sexpline.linehtml = htmlizer(sexpline.pre) .. sexpline.sexphtml .. sexpline.spaces else sexpline.linehtml = htmlizer(sexpline.line) end return sexpline end, -- sexphtml_ = function (sexpline, htmlizer) return sexpline:presexpspaces_() and sexpline:eword_() and sexpline:sexpurls_() -- defined below and sexpline:owrc_() and sexpline:sexphtml__(htmlizer) end, linehtml_ = function (sexpline, htmlizer) sexpline:sexphtml_() sexpline:linehtml__() return sexpline end, -- -- Two functions to extract the "elements" of the sexp, as strings. -- Like this, but 1-based and typed: (find-elnode "List Elements" "nth") symbol = function (sexpline, n) return sexpline[n] and sexpline[n][0]:match"^([^()\"].*)$" end, string = function (sexpline, n) return sexpline[n] and sexpline[n][0]:match"^\"(.*)\"$" end, -- eword_ = function (sexpline) sexpline.word = sexpline:symbol(1) sexpline.eword = ewords[sexpline.word] return sexpline.eword end, sexpurls_ = function (sexpline) local eword = sexpline.eword if eword then local a, b = sexpline:string(2), sexpline:string(3) -- no specials yet sexpline.helpurl = eword:helpurl_() sexpline.targeturl = eword:targeturl_(a, b) -- return sexpline.helpurl, sexpline.targeturl return true end end, }, } Eword = Class { type = "Eword", __index = { helpurl_ = function (eword) return eword.help end, targeturl_ = function (eword, a, b) return eword.base and eword:f(a, b) end, f = function (eword, a, b) return addextanchor(a and eword.base..a, eword.ext, b) end, }, } ewords = {} htmlizeline_ = function (line, htmlizer) return (Sexpline {line=line}):linehtml_(htmlizer) end htmlizeline = function (line, htmlizer) return (Sexpline {line=line}):linehtml_(htmlizer).linehtml end htmlizelines = function (bigstr, htmlizer) local f = function (line) return htmlizeline(line, htmlizer) end return bigstr:gsub("[^\n]*", f) end -- -- __ _ _ __ __ _ __ _ -- / _` | '_ \ / _` |/ _` | -- | (_| | | | | (_| | (_| | -- \__,_|_| |_|\__, |\__, | -- |___/ |___/ targeturl_base_a = function (eword, a, b) return a and eword.base..a -- use just the a end targeturl_to = function (eword, a, b) return a and "#"..a end eevarticle = eevarticle or "http://angg.twu.net/eev-article.html" --[[ ewords["to"] = Eword { help = eevarticle.."#anchors", -- base = "", -- targeturl = function (eword, sexp) -- local anchor = sexp:string(2) -- if anchor then return "#"..anchor end -- end, } --]] Ew = function (ew) ew.help = ew.help or eevarticle.."#shorter-hyperlinks" return Eword(ew) end Ewa = function (ew) ew.targeturl = targeturl_base_a return Ew(ew) end ewords["to"] = Ew { help = eevarticle.."#anchors", targeturl_ = targeturl_to, } code_c_d_angg = function (c, d) code_c_d_remote(c, pathto(d)) end code_c_d_remote = function (c, d) ewords["find-"..c.."file"] = Ewa {base = d} ewords["find-"..c] = Ew {base = d, ext = ".html"} ewords["find-"..c.."w3m"] = Ewa {base = d} end code_c_d_angg("angg", "") -- (find-angg "blogme4/") code_c_d_angg("es", "e/") -- (find-es "lua5") code_c_d_angg("dednat4", "dednat4/") -- (find-dednat4 "") code_c_d_angg("dn4", "dednat4/") code_c_d_angg("dn4ex", "dednat4/examples/") code_c_d_angg("dn5", "dednat5/") code_c_d_angg("blogme", "blogme/") code_c_d_angg("blogme3", "blogme3/") code_c_d_angg("blogme4", "blogme4/") code_c_d_angg("eev", "eev-current/") code_c_d_angg("flua", "flua/") code_c_d_angg("rubyforth", "rubyforth/") code_c_d_angg("vtutil", "vtutil/") code_c_d_angg("vtutil4", "vtutil4/") code_c_d_angg("RETRO", "RETRO/") ewords["find-es"].ext = ".e.html" -- dump-to: tests -- (find-blogme4 "angglisp.lua") -- _____ _ __ _ _ -- |_ _|__ ___| |_ / _|_ _ _ __ ___| |_(_) ___ _ __ ___ -- | |/ _ \/ __| __| | |_| | | | '_ \ / __| __| |/ _ \| '_ \/ __| -- | | __/\__ \ |_ | _| |_| | | | | (__| |_| | (_) | | | \__ \ -- |_|\___||___/\__| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/ -- elinksplittest1 = function (line) local pre, sexp, spaces, elements = elinksplit_(line) if not pre then return end local chars = {} local absrange = function (s, e, char) for i=s,e-1 do chars[i]=char end end local range = function (s, e, char) absrange(#pre+s, #pre+e, char) end absrange(1, 1+#pre, "p") absrange(1+#pre, 1+#pre+#sexp, "-") absrange(1+#pre+#sexp, 1+#pre+#sexp+#spaces, "s") for i,elt in ipairs(elements) do range(elt[1], elt[2], i) end return table.concat(chars) end elinksplittest = function (bigstr) for _,line in ipairs(splitlines(bigstr)) do print(" -- [["..line.."]]") local ranges = elinksplittest1(line) if ranges then print(" -- "..ranges) end end end transpose = function (T) local TT = {} for k,v in pairs(T) do TT[v] = k end return TT end sortedpairs = function (T, K) local P = {} local add = function (k) table.insert(P, {key=k, val=T[k]}) end K = (type(K) == "string" and split(K)) or K or {} local KT = transpose(K) for _,k in ipairs(K) do add(k) end for _,pair in ipairs(tos_sorted_pairs(T)) do if not KT[pair.key] then add(pair.key) end end return P end sexppairs = function (sexp) return sortedpairs(sexp, [[ line pre sexp spaces word eword o w r c helpurl targeturl sexphtml linehtml ]]) end isexppairs_ = function (sexp) return ipairs(sexppairs(sexp)) end isexppairs = function (line) return ipairs(sexppairs(htmlizeline_(line))) end isp = function (line) for _,kv in isexppairs(line) do if kv.val then print(kv.key.." = "..tos(kv.val)) end end end -- _____ _ -- |_ _|__ ___| |_ ___ -- | |/ _ \/ __| __/ __| -- | | __/\__ \ |_\__ \ -- |_|\___||___/\__|___/ -- --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) eevarticle = "eev-article.html" dofile "elisp.lua" li = [[ foo (to "plic") ]] PP(htmlizeline_(li)) se = Sexpline {line = li} PP(se:presexpspaces_()); PP(se) PP(se:eword_() ); PP(se) PP(se:sexpurls_() ); PP(se) PP(se:owrc_() ); PP(se) PP(se:sexphtml_(htmlizer)); PP(se) isp [[ foo (to "plic") ]] for _,kv in isexppairs(" foo (bar plic) ") do PP(kv) end for _,kv in isexppairs([[ foo (to "bar") ]]) do if kv.val then print(kv.key.." = "..tos(kv.val)) end end PP(elinksplit " foo (bar plic) ") --> { -- line =" foo (bar plic) ", -- pre =" foo " , -- sexp = "(bar plic)" , -- spaces= " ", -- 1={0="bar", 2, 5}, -- 2={0="plic", 6, 10}, -- } elinksplittest [[ For lines with elinks, like the one below, # (foo "a") (bar "plic: \"ploc\"") the test function shows a "range dump". ]] --> [[ For lines with elinks, like the one below,]] -- [[ # (foo "a") (bar "plic: \"ploc\"")]] -- pppppppppppppp-111-2222222222222222- -- [[ the test function shows a "range dump".]] * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "elisp" for li in io.lines("build.lua") do local sexp = elinksplit(li) local symbol = sexp:symbol(1) if symbol then local a, b = sexp:string(2), sexp:string(3) print(sexp.sexp) PP(a, b) end end -- (find-fline "build.lua") -- for _,li in ipairs(splitlines(readfile "dednat5/README")) do tt(li) end -- map(tt, splitlines(readfile "dednat5/README")) -- tt [[ # (find-fline "foo") (find-fline "bar") ]] * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "elisp.lua" el = elinksplit [[# (find-angg "blogme3/elisp.lua")]] PP(el) PP(el:sexpsplit()) el = elinksplit [[# (find-angg "blogme3/")]] print(el:sexphtml()) el = elinksplit [[# (find-angg "blogme3/elisp.lua")]] print(el:sexphtml()) el = elinksplit [[# (find-angg "blogme3/elisp.lua" "foo")]] print(el:sexphtml()) el = elinksplit [[# (find-image "foo.jpg")]] print(el:sexphtml()) * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "elisp" s = Sexpline { line = [[ # (to "targ") ]] } = s:get_linehtml() PP(s) mykeys = split [[ line pre sexp spaces word eword o w r c helpurl targeturl sexphtml linehtml ]] mykeyst = transpose(mykeys) for _,k in ipairs(mykeys) do print(" "..k.."="..tos(s[k])..",") end for _,p in ipairs(tos_sorted_pairs(s)) do if not mykeyst[p.key] then print(" "..p.key.."="..tos(p.val)..",") end end PP(s.eword) PP(s.eword:get_targeturl("targ", "b")) line=" # (to \"targ\") ", pre =" # ", sexp= "(to \"targ\")", spaces= " ", 1= {0="to", 1=2, 2=4}, 2= {0="\"targ\"", 1=5, 2=11}, word= "to", eword={"help"="http://angg.twu.net/eev-article.html#anchors", "targeturl"=<function: 0x8f45f78>}, helpurl="http://angg.twu.net/eev-article.html#anchors", o="(", w="to", r=" \"targ", c="\")", sexphtml="(<a href=\"http://angg.twu.net/eev-article.html#anchors\">to</a> \"targ\")", linehtml=" # (<a href=\"http://angg.twu.net/eev-article.html#anchors\">to</a> \"targ\") ", --]==] -- Local Variables: -- coding: raw-text-unix -- ee-anchor-format: "«%s»" -- End: