Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
--- flua-comp.lua - compile a flua program into an engine file in C
--- and a bytecode file in Nasm.
--
-- Copyright (C) 2001 Eduardo Ochs.
-- Author:     Eduardo Ochs <edrx@mat.puc-rio.br>
-- Version:    0.02 (2001apr29).
--
-- This program was copylefted to prevent against patent psychopaths;
-- if you want a version with any other license you'll have to write
-- it yourself. More formally,
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License as
-- published by the Free Software Foundation; either version 2 of the
-- License, or (at your option) any later version.


-- Flua is an experimental Forth-like language, built on top of Lua, C
-- and Nasm. It is an implementation of the ideas of my (old) "Crim"
-- project.
--
-- Related files:
-- (find-flua "flua.lua")
-- (find-flua "flua-prims.lua")
-- (find-flua "flua-demos.lua")
-- (find-flua "flua-lua.lua")
-- (find-flua "inc.lua")


-- «.nasmnames»			(to "nasmnames")
-- «.xprims_structs»		(to "xprims_structs")
-- «.asm»			(to "asm")
-- «.lbl_glbl_tolbl»		(to "lbl_glbl_tolbl")
-- «.add_xxxprims»		(to "add_xxxprims")
-- «.strings»			(to "strings")
-- «.build_engine_strings»	(to "build_engine_strings")
-- «.skeleton_files»		(to "skeleton_files")
-- «.basic_parsing»		(to "basic_parsing")
--   «.getword»			(to "getword")
--   «.getrestofline»		(to "getrestofline")
--   «.getupto»			(to "getupto")
--   «.f»			(to "f")
--   «.tick»			(to "tick")
--   «.gtick»			(to "gtick")
--   «.dbstuff»			(to "dbstuff")
--   «.cnword»			(to "cnword")
--   «.c1word»			(to "c1word")
-- «.lua_code_in_flua»		(to "lua_code_in_flua")
--   «.getluaargs»		(to "getluaargs")
--   «.immed_lua»		(to "immed_lua")
--   «.word:lua»		(to "word:lua")
--   «.word:nasm»		(to "word:nasm")
--   «.word:dbstuff»		(to "word:dbstuff")
--   «.word:function»		(to "word:function")
-- «.tmplabels»			(to "tmplabels")
--   «.word:lbl:»		(to "word:lbl:")
--   «.word:tolbl»		(to "word:tolbl")
--   «.word:if»			(to "word:if")
--   «.word:then»		(to "word:then")
--   «.word:else»		(to "word:else")



--%%%%%
--%
--% «nasmnames»  (to ".nasmnames")
--%
--%%%%%

function nasmify(str)
  return gsub(str, "([^0-9A-Za-z_])",
    function (c)
      return format("x%02x", strbyte(c))
    end)
end
function names2cname(arr)
  if getn(arr) == 0 then print("Tried to cname an empty array") end
  local cname = {}
  foreachi(arr,
    function(_, name)
      if not strfind(name, "[^0-9A-Za-z_]") then %cname[1] = name end
    end)
  return cname[1] or nasmify(arr[1])
end

-- px(names2cname(split("TIMES *")))
-- px(names2cname(split("/ *")))



--%%%%%
--%
--% «xprims_structs»  (to ".xprims_structs")
--%
--%%%%%

-- We define this separately because SFprims uses it in a weird way.
function basic_assert_used(xprims, cname)
  if not xprims.cname2usedp[cname] then
    xprims.cname2usedp[cname] = "used"
    tinsert(xprims.cnames_used, cname)
    xprims.cname2n[cname] = getn(xprims.cnames_used)
  end
end

function new_xprims_struct(opcode_first, opcode_step, use_all)
  local xprims = {}
  xprims.cnames = {}
  xprims.cname2code = {}
  xprims.cname2usedp = {}
  xprims.cnames_used = {}
  xprims.use_all = use_all
  xprims.assert_used = basic_assert_used
  xprims.add_cname_code = function(self, cname, code)
    if not self.cname2code[cname] then
      tinsert(self.cnames, cname)
    end
    self.cname2code[cname] = code
    if self.use_all then self:assert_used(cname) end
  end
  xprims.cname2n = {}
  xprims.n = function(self) return getn(self.cnames_used) end
  xprims.opcode_first = opcode_first
  xprims.opcode_step = opcode_step
  xprims.opcode = function(self, cname)
    if not self.cname2n[cname] then printf("No opcode for %s\n", cname) end
    return self.opcode_first + (self.cname2n[cname] - 1) * self.opcode_step
  end
  xprims.opcode_last = function(self)
    return self.opcode_first + (self:n() - 1) * self.opcode_step
  end
  xprims.forall_used = function(self, f)
    foreachi(self.cnames_used, function (i, cname)
        %f(cname, %self.cname2code[cname], %self.cname2n[cname])
      end)
  end
  return xprims
end

  Hprims = new_xprims_struct(0, 1)
FIPprims = new_xprims_struct(65535, -1, "use_all")
  Fprims = new_xprims_struct(4095, -1)     -- the "4096" will be adjusted later
 SFprims = new_xprims_struct(255, -1)      -- and also some fields of SFprims

function SFprims:assert_used(SFcname)
  Fprims:assert_used(strsub(SFcname, 2))   -- use the "F_xxx" from the "SF_xxx"
  basic_assert_used(SFprims, SFcname)
end

action = {}



--%%%%%
--%
--% «asm»  (to ".asm")
--%
--%%%%%

function nasm(nbytes, ...)
  strings.nasmbytecode = strings.nasmbytecode .. call(format, arg)
  -- call(printf, arg)
end

function db_prim(symbol)
  nasm(1, "\tdb %s\n", symbol)
end
function dhl_prim(symbol)
  nasm(2, "\tdhl %s\n", symbol)
end
function db_forth(symbol)
  nasm(2, "\tdhl %s\n", symbol)
end

-- «lbl_glbl_tolbl»  (to ".lbl_glbl_tolbl")
function lbl(labelname)
  nasm(0, "%s:\n", labelname)
end
function glbl(labelname)
  nasm(0, "    global %s\n", labelname)
  nasm(0, "%s:\n", labelname)
end
function tolbl(labelname)
  nasm(2, format("\tdw %s -_f0\n", labelname))
end




--%%%%%
--%
--% «add_xxxprims»  (to ".add_xxxprims")
--%
--%%%%%

function foreach2(arr, f)
  local i=1
  while i<=getn(arr) do
    f(arr[i], arr[i+1])
    i=i+2
  end
end

function add_Hprims(...)
  foreach2(arg, function(names_str, code)
    local cname = "H_"..names2cname(split(names_str))
    Hprims:add_cname_code(cname, code)
    foreachi(split(names_str), function(i, name)
      local cname = %cname
      action[name] = function ()
        Hprims:assert_used(%cname)
	db_prim(%cname)
      end
      action["H_"..name] = action[name]
      action[name..":"] = action[name]
    end)
  end)
end

Fprims.name2cname = {}

function add_Fprims(...)
  foreach2(arg, function(names_str, code)
    local cname = "F_"..names2cname(split(names_str))
    Fprims:add_cname_code(cname, code)
    foreachi(split(names_str), function(i, name)
      local cname = %cname
      Fprims.name2cname[name] = cname
      action[name] = function ()
          Fprims:assert_used(%cname)
	  dhl_prim(%cname)
        end
      action["F_"..name] = action[name]
    end)
  end)
end

function add_FIPprims(...)
  foreach2(arg, function(names_str, code)
    local cname = "FIP_"..names2cname(split(names_str))
    FIPprims:add_cname_code(cname, code)
  end)
end


function add_SFprims(names_str, mark_as_used)
  foreachi(split(names_str),
    function(i, name)
      local Fcname = Fprims.name2cname[name]
      local SFcname = "S"..Fcname
      SFprims:add_cname_code(SFcname, Fcname)   -- too skeletal?
      if %mark_as_used then SFprims:assert_used(SFcname) end
      action[name] =
	function ()
          SFprims:assert_used(%SFcname)
	  db_prim(%SFcname)
        end
      action["SF_"..name] = action[name]
    end)
end




--%%%%%
--%
--% «strings»  (to ".strings")
--%
--%%%%%

strings = {}
strings.nasmbytecode = ""
strings.Cextras1=""

-- «build_engine_strings»  (to ".build_engine_strings")
function build_engine_strings()
  strings.Hprims=""
  strings.Fprims=""
  strings.SFprims=""
  strings.FIPprims=""
  strings.Cdefs=""
  strings.Cdefs_LAST=""
  Hprims:forall_used(function (cname, code, n)
    strings.Hprims = strings.Hprims .. format("case %s: %s\n", cname, code)
    strings.Cdefs = strings.Cdefs ..
      format("#define %-16s 0x%02X\n", cname, Hprims:opcode(cname))
  end)
  FIPprims:forall_used(function (cname, code, n)
    strings.FIPprims = strings.FIPprims .. format("case %s: %s\n", cname, code)
    strings.Cdefs = strings.Cdefs ..
      format("#define %-16s 0x%02X\n", cname, FIPprims:opcode(cname))
  end)
  SFprims:forall_used(function (cname, code, n)
    --strings.SFprims = strings.SFprims .. format("(SF) %s: %s\n", cname, code)
    strings.SFprims = strings.SFprims .. format("%s, ", code)
    strings.Cdefs = strings.Cdefs ..
      format("#define %-16s 0x%02X\n", cname, SFprims:opcode(cname))
  end)
  Fprims.opcode_first = SFprims:opcode_last() * 256 - 1
  Fprims:forall_used(function (cname, code, n)
    strings.Fprims = strings.Fprims .. format("case %s: %s\n", cname, code)
    strings.Cdefs = strings.Cdefs ..
      format("#define %-16s 0x%04X\n", cname, Fprims:opcode(cname))
  end)
  strings.Cdefs_LAST =
    format("#define %-16s 0x%02X\n",   "H_LAST",   Hprims:opcode_last()) ..
    format("#define %-16s 0x%04X\n", "FIP_LAST", FIPprims:opcode_last()) ..
    format("#define %-16s 0x%02X\n",  "SF_LAST",  SFprims:opcode_last()) ..
    format("#define %-16s 0x%04X\n",   "F_LAST",   Fprims:opcode_last())
end

-- dump_engine_strings(): for debugging.
function dump_engine_strings()
  printf("\n/* Cdefs: */\n%s",        strings.Cdefs)
  printf("\n/* Cdefs_LAST: */\n%s",   strings.Cdefs_LAST)
  printf("\n/* Cextras1: */\n%s",     strings.Cextras1)
  printf("\n/* Hprims: */\n%s",       strings.Hprims)
  printf("\n/* FIPprims: */\n%s",     strings.FIPprims)
  printf("\n/* Fprims: */\n%s",       strings.Fprims)
  printf("\n/* SFprims: */\n%s",      strings.SFprims)
  printf("\n/* nasmbytecode: */\n%s", strings.nasmbytecode)
-- print(strings.nasmbytecode)
end



--%%%%%
--%
--% «skeleton_files»  (to ".skeleton_files")
--%
--%%%%%

function process_skeleton(skelstr, delim1, delim2, f)
  local rest, result = skelstr, ""
  local pre, d1, post, mid, d2
  while 1 do
    pre, d1, post = split_at_first(rest, delim1)
    if not post then break end
    mid, d2, rest = split_at_first(post, delim2)
    -- result = result .. pre .. "/* [lua" .. mid .. "lua] */" .. dostring(mid)
    result = result .. pre .. f(mid)
  end
  return result .. rest
end

function process_C_skeleton(fnamein, fnameout)
  local str = process_skeleton(readfile(fnamein),
    "/**[lua", "lua]**/",
    function (mid)
      return "/*[lua" .. mid .. "lua]*/" .. dostring(mid)
    end)
  if fnameout then writefile(fnameout, str) end
  return str
end
function process_nasm_skeleton(fnamein, fnameout)
  local str = process_skeleton(readfile(fnamein), "\n;;; lua:", "\n",
    function (mid)
      return "\n;; lua:" .. mid .. "\n" .. dostring(mid)
    end)
  if fnameout then writefile(fnameout, str) end
  return str
end




--%%%%%
--%
--% «basic_parsing»  (to ".basic_parsing")
--%
--%%%%%

rest = ""
word = ""
restofline = ""

-- «getword»  (to ".getword")
function getword()
  local _, tmprest
  _, _, word, tmprest = strfind(rest, "^%s*([^%s]+)(.*)")
  if word then
    if DBG then
      printf("getword: %q\n", word)
    end
    rest = tmprest
    return word
  end
end

-- «getrestofline»  (to ".getrestofline")
function getrestofline()
  local _
  restofline, _, rest = split_at_first(rest, "\n", rest, "", "")
  return restofline
end

-- «getupto»  (to ".getupto")
function getupto(delim, canthave)
  local _, inner, tmprest
  inner, delim, tmprest = split_at_first(rest, delim)
  if not inner then
    printf("getupto(%q, %q), when rest = %q: no closing delim!\n",
           delim, canthave, rest)
  end
  if canthave and strfind(inner, canthave) then
    printf("getupto(%q, %q), when rest = %q: captured too much!\n",
           delim, canthave, rest)
  end
  if DBG then printf("getupto: inner=%q, delim=%q\n", inner, delim) end
  rest = tmprest
  return inner
end

-- «f»  (to ".f")
function f(str)
  local oldrest = rest
  rest = str
  while getword() do
    action[word]()
  end
  rest = oldrest
end




-- «tick»  (to ".tick")
function tick(name)
  local nasmname = "ADR_" .. nasmify(name)
  lbl(nasmname)
  action[name] = function () db_forth(%nasmname .. " -_f0") end
end

-- «gtick»  (to ".gtick")
function gtick(name)
  local nasmname = "ADR_" .. nasmify(name)
  glbl(nasmname)
  action[name] = function () db_forth(%nasmname .. " -_f0") end
end

-- «dbstuff»  (to ".dbstuff")
function dbstuff(...)
  local dbs, len = {}, 0
  local i, obj, rest, _, str, c
  for i = 1, getn(arg) do
    obj = arg[i]
    if type(obj) == "string" then
      rest = obj
      while rest ~= "" do
        _, _, str, c, rest = strfind(rest, "^([ -&(-~]*)(.?)(.*)")
        if str ~= "" then
          tinsert(dbs, "'" .. str .. "'")
          len = len + strlen(str)
        end
        if c ~= "" then
          tinsert(dbs, strbyte(c))
          len = len + 1
        end
      end
    elseif type(obj) == "number" then
      tinsert(dbs, obj)
      len = len + 1
    else
      error("weird type for db!")
    end
  end
  nasm(len, format("\tdb %s\n", join(dbs, ",")))
end

-- «cnword»  (to ".cnword")
function cnword(wordname, heads, cname)
  cname = cname or wordname
  tick(wordname)
  f(heads)
  nasm(4, format("    extern %s\n\tdd %s\n", cname, cname))
end

-- «c1word»  (to ".c1word")
function c1word(name, cname)
  cnword(name, "C1:", cname)
end




--%%%%%
--%
--% «lua_code_in_flua»  (to ".lua_code_in_flua")
--%
--%%%%%

-- «word:lua»  (to ".word:lua")
-- The "lua" word implements a way (the simplest possible, I think) to
-- execute lua code from Flua; for example, in Flua,
--
--   lua <luacode> end;
--
-- executes dostring(<luacode>>) immediately. Newlines are allowed in
-- <luacode>.
--
action["lua"] = function() dostring(getupto("end;")) end


-- «getluaargs»  (to ".getluaargs")
function getluaargs()
  return getupto(");", "\n")
end

-- «immed_lua»  (to ".immed_lua")
function immed_lua(str)
  foreachi(split(str), function (i, funname)
      action[funname.."("] = function()
          dostring(%funname.."("..getluaargs()..")")
        end
    end)
end

-- «word:nasm»  (to ".word:nasm")
-- «word:dbstuff»  (to ".word:dbstuff")
immed_lua("nasm dbstuff")

-- immed_lua implements a simple way to call Lua functions from Flua
-- code... An example: running immed_lua("nasm dbstuff") is equivalent
-- to running:
--
--  action["nasm("]    = function() dostring("nasm("..getluaargs()..")") end
--  action["dbstuff("] = function() dostring("dbstuff("..getluaargs()..")") end
--
-- and after that things like the following Flua block will work
-- (i.e., making "f" run that block will add a definition for the word
-- "2" to the nasm bytecode; a stack diagram for "2" is at the right):
--
--   % 2 CON: nasm( 4, "\tdd 2\n");   -- 2 ( -- 2 )
--
-- Note that the word "nasm(" is executed "immediately" (in the Forth
-- sense of "immediate"!), by Lua; action["nasm("]() will call
-- getluaargs() to parse everything up to the ");", and then execute
-- 'nasm(4, "\tdd 2\n")' with dostring.



-- «word:function»  (to ".word:function")
action["function"] = function()
    local funname = getword()
    local body = getupto("end;")
    -- We define the function <funname> (callable from Lua):
    dostring(format("function %s %s end", funname, body))
    -- Now we define the word "funname(", that calls the function <funname>.
    action[funname.."("] = function()
        dostring(%funname.."("..getluaargs()..")")
      end
  end

-- The word "function" provides a way to actually define Lua (and
-- Flua!) functions from inside Flua code. A slightly artificial
-- example: in this block of flua code
--
--   function printf (...) write(call(format, arg)) end;
--   printf( "%s: %q\n", "rest", rest);
--
-- the first line will work exactly as
--
--   function printf (...) write(call(format, arg)) end
--   action["printf("] = function()
--     dostring("printf("..getluaargs()..")")
--   end
--
-- redefining the "printf" function of inc.lua, and the second line
-- will execute immediately (in the Forth sense)
--
--   printf("%s: %q\n", "rest", rest)
--
-- as the "getluaargs()" will have returned [["%s: %q\n", "rest", rest]].
--
-- Note that in action["function"] "getupto" is called without the
-- "canthave" argument; a call to "function" allows newlines before
-- the "end;", while an invocation of a word created with immed_lua
-- will complain if a newline is found before the ");".





--%%%%%
--%
--% «tmplabels»  (to ".tmplabels")
--%
--%%%%%

function d2n(stack, depth)		-- convert depth to n (top is depth=1)
  return getn(stack)-((depth or 1)-1)	-- to do: check bounds
end

function newstack()
  local stack = {}
  local s_push, s_pop, s_pick, s_pock, s_pluck
  s_push  = function (value) tinsert(%stack, value) end
-- s_pop is currently a copy of s_pluck
  s_pick  = function (depth) return %stack[d2n(%stack, depth)] end
  s_pock  = function (depth, value) %stack[d2n(%stack, depth)] = value end
  s_pluck = function (depth) return tremove(%stack, d2n(%stack, depth)) end
-- return stack, s_push, s_pop, s_pick, s_pock, s_pluck
  return stack, s_push, s_pluck, s_pick, s_pock, s_pluck
end

-- (find-node "(lua)tremove")
-- (find-node "(lua)tinsert")

ntmplabels = 0
tmplabelstack,
  tmplabelpush,
  tmplabelpop = newstack()

function tmplabelname(str)
  local labelname, _
  if str == ">" then			-- push a new tmplabel on the stack
    ntmplabels = ntmplabels + 1
    labelname = "LBL_"..ntmplabels
    tmplabelpush(labelname)
    return labelname
  else
    _, _, n = strfind(str, "^<([0-9]*)$")
    if n then				-- pop some tmplabel from the stack
      return tmplabelpop(tonumber(n))
    end
  end
  return "LBL_"..nasmify(str)
end
function gettmplabelname()
  return tmplabelname(getword())
end

-- «word:lbl:»  (to ".word:lbl:")
-- «word:tolbl»  (to ".word:tolbl")
action["lbl:"] = function() lbl(gettmplabelname()) end
action["tolbl"] = function() tolbl(gettmplabelname()) end

-- «word:if»  (to ".word:if")
-- «word:then»  (to ".word:then")
-- «word:else»  (to ".word:else")

-- «words:if_then_else»  (to ".words:if_then_else")
-- (find-flua "flua-demos.lua" "flua_demo4")
--
-- Some standard Forth control words. Note that we are not defining
-- 0BRANCH and BRANCH now; the "user" will have to define them as Flua
-- words, primitive or not -- and will have to follow the conventions
-- of lbl and tolbl: the destination is stored as dw labelname-_f0.
--
action["if"]   = function() f("0BRANCH tolbl >") end
action["else"] = function() f(" BRANCH tolbl > lbl: <2") end
action["then"] = function() f("lbl: <") end





--
-- Local Variables:
-- coding:               no-conversion
-- ee-anchor-format:     "«%s»"
-- ee-charset-indicator: "Ñ"
-- ee-comment-format:    "-- %s\n"
-- End: