blogme4/etc.lua (htmlized)

Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
-- Scratch code that is not being used but that I do not want to
-- delete (yet).

--- Low-level parsing functions for blogme4 (for parsing arglists).
-- To explain these functions I need to explain what they are designed
-- to parse, and introduce some terminology. Here it goes.
--
-- The Language
-- ============
-- In the blogme "language" we have only three character classes:
--   "brackets"    ('[' and ']'),
--   "space chars" (SPC, TAB, CR, NL, maybe FF), and
--   "word char"   (everything else).
-- The "regular chars" are all those that are not brackets; i.e.,
-- the word chars plus the space chars.
-- A "block" is a sequence of chars that matches the lua pattern
-- "%b[]".
-- A "word" is a sequence of one or more word chars.
-- A "spaces" is a sequence of one or more space chars.
-- A "regulars" is a sequence of one or more regular chars.
-- A "long word" is a sequence of one or more (word | block)s.
-- A "very long word" is a sequence of one or more (regulars | block)s.
--
-- Evaluation
-- ==========
-- Lets's start with an example.
-- If we define the blogme word HREF as this,
--   def [[ HREF 2 url,body "<a href=\"$url\">$body</a>" ]]
-- then evaluating this
--   [HREF http://foo/ bar plic]
-- calls
--   HREF("http://foo/", "bar plic"),
-- which returns:
--   '<a href="http://foo/">bar plic</a>'.
-- What matters to us now is that the "2" in "def [[ HREF 2 ... ]]"
-- says that "HREF", as a blogme word, is a function of two arguments,
-- parsed in the default way; this means that to build an arglist from
--   " http://foo/ bar plic"
-- we first skip spaces, then we parse a long word, the skip spaces
-- again, then we parse a "very long word" - because the default is
-- that the last argument is always a very long word.
--
-- Here is a more realistic example. Evaluating
--   [HREF http://[+ 1 2]/  bar  [* [+ 1 2] [+ 3 4]][* 10 10]plic]
-- calls, after all the "+"s and "*"s, this:
--   HREF("http://3/", "bar  21100plic")
-- That is, apparently
--   [HREF http://[+ 1 2]/  bar  [* [+ 1 2] [+ 3 4]][* 10 10]plic]
-- gets replaced by
--   [HREF http://3/  bar  [* 3 7]100plic]
-- and that by:
--   [HREF http://3/  bar  21100plic]
-- More precisely: to obtain the "result" of parsing a long word, like
--   "http://[+ 1 2]/"
-- or a very long word, like
--   "bar  [* [+ 1 2] [+ 3 4]][* 10 10]plic"
-- we leave the "words" and the "regulars" in them unchanged, we
-- evaluate the blocks in them, and we concatenate all the results...
--
-- Doing that fully involves a nasty recursion, and the best way that
-- I have found to factor the complexity out is parse "tlongwords"
-- and "tverylongwords". The result of parsing this:
--   "http://[+ 1 2]/"
--    ^      ^     ^
--    1      8     14
-- as a tlongword is:
--   {"http://", {8, 14}, "/"}
-- The "literal" parts that do not require evaluation are stored as
-- strings, the other ones are stored as pairs - the position of the
-- '[' and the position after the corresponding ']'.

-- See: (find-angg "LUA/canvas2.lua" "Class")
--      (find-blogme3file "definers.lua")
-- require "eoo"  -- (find-blogme4 "eoo.lua")
-- (find-blogme3 "brackets.lua" "parsers")
-- (find-blogme4 "brackets.lua")
-- (find-es "lua5" "lpeg-quickref")
-- (find-luamanualw3m "#5.4.1" "Patterns")



-- (find-es "lua5" "lpeg-quickref")
-- (find-luamanualw3m "#5.4.1" "Patterns")




require "brackets"  -- (find-blogme4 "eoo.lua")
printbracketstructure(str)
PP(bracketstructure(str))
-- (find-angg "LUA/canvas2.lua")
string.replace = function (str, other, pos)
    local left = str:sub(1, pos - 1)
    if #left < pos - 1 then left = left .. (" "):rep(pos - #left - 1) end
    local right = str:sub(pos + #other)
    return left .. other .. right
  end

* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
userocks()
loadlpeg()
lpeg.pattern = function (pat)
    return function (subj, pos) return subj:match(pat, pos) end
  end
BracketChar  = lpeg.S "[]"
SpaceChar    = lpeg.S " \t\n\r"
WordChar     = 1 - BracketChar - SpaceChar
RegularChar  = 1 - BracketChar
Spaces       = SpaceChar ^ 1
OptSpaces    = SpaceChar ^ 0
Word         = WordChar    ^ 1
RegularChars = RegularChar ^ 1
Block        = lpeg.pattern "^%b[]()"
LongWord     = (Block + Word) ^ 1
VeryLongWord = (Block + RegularChars) ^ 1







-- Local Variables:
-- coding:             raw-text-unix
-- ee-anchor-format:   "«%s»"
-- End: