Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
-- This file:
--   http://anggtwu.net/blogme3/htmlize-utf8.lua.html
--   http://anggtwu.net/blogme3/htmlize-utf8.lua
--           (find-angg "blogme3/htmlize-utf8.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
--
-- Until sep/2021 Blogme3 was only able to parse and htmlize files in
-- utf-8 by converting them to unibyte first...

-- Based on:
-- (find-blogme3 "escripts.lua" "anchor")
-- (find-blogme3 "charset.lua" "sgmlify")

loadlpeg()

-- SpecialChar
--
SpecialChars_table = {
  ["&"] = "&amp;",
  ["<"] = "&lt;",
  [">"] = "&gt;",
  ["*"] = "<font color=\"red\"><strong>*</strong></font>",
  ["«"] = "<font color=\"green\">&laquo;</font>",
  ["»"] = "<font color=\"green\">&raquo;</font>",
}
SpecialChar0 = (lpeg.S("*&<>") + "«" + "»")
SpecialChar  = SpecialChar0 / SpecialChars_table


-- SpecialSeq
--
SpecialSeq_table = {
  [""]   = "<font color=\"red\"><strong></strong></font>",
  ["|&"]  = '<a href="http://anggtwu.net/e/bash.e.html#pipe_stdout_stderr">|&amp;</a>',
  ["$S/"] = '<a href="http://anggtwu.net/eev-intros/find-psne-intro.html">$S/</a>',
  ["<<'%%%'"] = '<a href="http://en.wikipedia.org/wiki/Here-document">&lt;&lt;\'%%%\'</a>',
}
SpecialSeq0 = (lpeg.P("|&") + "" + "$S/" + "<<'%%%'")
SpecialSeq  = SpecialSeq0 / SpecialSeq_table


-- Anchor
--
AlphaNumeric = lpeg.R("AZ", "az", "09")
AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~")
Anchor0    = ("«" * lpeg.C(AnchorChar^1) * "»")
Anchor     = Anchor0
           / '<a name="%1"><font color="green">&laquo;%1&raquo;</font></a>'

-- Url
--
UrlProtocol   = lpeg.P("https") + lpeg.P("http") + lpeg.P("ftp")
UrlDomainChar = lpeg.R("az", "09") + lpeg.S("-")
UrlDomain     = UrlDomainChar^1 * ("." * UrlDomainChar^1)^0
UrlPathChar   = AlphaNumeric + lpeg.S("!#$%&()*+,-./:;=?@[]^_{|}~")
UrlPath       = UrlPathChar^0
Url0          = UrlProtocol * "://" * UrlDomain * "/" * UrlPath
Url           = lpeg.C(Url0) / '<a href="%1">%1</a>'


-- HtmlizeLeft
--
   SpecialLeft      = Anchor + SpecialSeq  + SpecialChar  + Url
   SpecialLeft0     =          SpecialSeq0 + SpecialChar0 + Url0
NonSpecialLeftChar  = (- SpecialLeft0) * lpeg.P(1)
NonSpecialLeftChars = lpeg.C(NonSpecialLeftChar^1)
   HtmlizeLeft0     = lpeg.Ct((NonSpecialLeftChars + SpecialLeft)^0)
   HtmlizeLeft      = HtmlizeLeft0 / table.concat


-- HtmlizeMiddle
--
   SpecialMiddle      = SpecialChar
NonSpecialMiddleChar  = (- SpecialMiddle) * lpeg.P(1)
NonSpecialMiddleChars = lpeg.C(NonSpecialMiddleChar^1)
   HtmlizeMiddle0     = lpeg.Ct((NonSpecialMiddleChars + SpecialMiddle)^0)
   HtmlizeMiddle      = HtmlizeMiddle0 / table.concat


teststr1 = "«foo»"
teststr2 = "*"
teststr3 = "http://foo.bar/"

--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "htmlize-utf8.lua"

= SpecialLeft:match(teststr1)
= SpecialLeft:match(teststr2)
= SpecialLeft:match(teststr3)
= SpecialLeft:match "<<'%%%'"
= SpecialLeft:match "|&"

= NonSpecialLeftChars:match "bla_http://foo.bar/"
= NonSpecialLeftChars:match "bla_&"
= NonSpecialLeftChars:match "http://foo.bar/"
= NonSpecialLeftChars:match "&"
= NonSpecialLeftChars:match "abc"

= HtmlizeLeft:match "ab&cd<> http://foo.bar/ !http://"

--]]






-- (find-blogme3 "anggdefs.lua" "headers")
-- (find-THfile "test-utf8.blogme")
-- (find-blogme3 "options.lua" "htmlizefile")
-- (find-blogme3 "options.lua" "basic-options-sandwich")
-- (find-blogme3 "sandwiches.lua")
-- (find-blogme3 "sandwiches-defs.lua" "use_sand_htmlizeline")
-- (find-blogme3grep "grep --color=auto -nH --null -e htmlizer *.lua")
-- (find-blogme3grep "grep --color=auto -nH --null -e htmlizelines *.lua")
-- (find-blogme3 "escripts.lua" "htmlizeline")

--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
loadblogme3()

-- (find-fline "~/LATEX/dednat6/eoo-unicode.lua")
fname     =    "~/LATEX/dednat6/eoo-unicode.lua"
outfname  =        "/tmp/out.html"

* (find-sh0 "rm -fv /tmp/out.html")

htmlizefile_utf8(fname, outfname)
-- (find-fline "/tmp/out.html")
-- file:///tmp/out.html


require "sandwiches-defs"
require "htmlize-utf8"


html_dtd  = dtd_transitional .. dtd_encoding_utf8

fname     = "~/blogme3/htmlize-utf8.lua"

-- (find-fline "~/LATEX/dednat6/eoo-unicode.lua")
fname     = "~/LATEX/dednat6/eoo-unicode.lua"
outfname  = "/tmp/out.html"
fcontents = ee_readfile(fname)
fnamestem = fnamenondirectory(fname)
warning   = htmlization_warning(fname)
head      = htmlization_head(fname)

= head
= warning

hl_utf8 = HtmlizeLine {
  left  = function (hl, str) return HtmlizeLeft  :match(str) end,
  plain = function (hl, str) return HtmlizeMiddle:match(str) end,
}
htmlizeline = function (linestr) return (hl_utf8:line(linestr)) end
htmlizer    = htmlizelines
body        = BODY(warning .. PRE(htmlizer(fcontents)))

writefile(outfname, HTML(head .. body))
writefile(outfname, html_dtd .. HTML(head .. body))
-- (find-fline "/tmp/out.html")
-- file:///tmp/out.html


--]]





-- Local Variables:
-- coding:  utf-8-unix
-- End: