|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- -*- coding: raw-text-unix -*-
-- charset.lua - handle non-ascii characters and sgmlification.
-- This is part of blogme3.
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2008jun30
-- <http://anggtwu.net/blogme3/charset.lua>
-- <http://anggtwu.net/blogme3/charset.lua.html>
-- License: GPL.
--
-- This is the part of blogme3 that handles weird (non-ascii)
-- characters. It works only for latin1 in unibyte encodings.
-- It supports just these glyphs by default: "*", "«", and "»".
-- (find-eevarticlesection "glyphs")
-- (find-blogme3 "anggdefs.lua" "eev_math_glyphs_edrx")
-- «.sgmlify» (to "sgmlify")
-- «.unaccent» (to "unaccent")
--------[ sgmlify ]--------
-- «sgmlify» (to ".sgmlify")
-- A test: lpeg.match(Sgmlifying, "Fun\231\227o")
-- (find-eoutput '(ascstr 128 255))
sgmlify_latin1_pairs = [[
Æ AElig Á Aacute  Acirc À Agrave Å Aring à Atilde Ä Auml
Ç Ccedil É Eacute Ê Ecirc È Egrave Ë Euml Í Iacute Ï Iuml
Ó Oacute Ô Ocirc Ò Ograve Õ Otilde Ö Ouml Ú Uacute Û Ucirc
Ù Ugrave Ü Uuml á aacute â acirc æ aelig à agrave å aring
ã atilde ä auml ç ccedil é eacute ê ecirc è egrave ë euml
í iacute î icirc ì igrave ï iuml ó oacute ô ocirc ò ograve
õ otilde ö ouml ß szlig ú uacute û ucirc ù ugrave ü uuml
ª ordf « laquo ° deg º ordm » raquo
Ñ Ntilde ñ ntilde
× times
]]
sgmlify_table = {}
for c,name in each2(split(sgmlify_latin1_pairs)) do
sgmlify_table[c] = "&"..name..";"
end
sgmlify_table["\15"] = "<font color=\"red\"><strong>*</strong></font>"
sgmlify_table["\171"] = "<font color=\"green\"><i>«</i></font>"
sgmlify_table["\187"] = "<font color=\"green\"><i>»</i></font>"
sgmlify_re = "([\15\161-\254])"
sgmlify = function (s) return translatechars(s, sgmlify_re, sgmlify_table) end
-- (find-angg "LUA/lua50init.lua" "translatechars")
--------[ unaccent ]--------
-- «unaccent» (to ".unaccent")
-- (find-blogmefile "blogme2-outer.lua" "unaccent_from, unaccent_to =")
unaccent_from, unaccent_to =
"ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý",
"AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuy"
unaccent_table = {}
for i = 1,strlen(unaccent_from) do
unaccent_table[strsub(unaccent_from, i, i)] = strsub(unaccent_to, i, i)
end
unaccent_re = "([\192-\254])"
unaccent = function (str)
return translatechars(str, unaccent_re, unaccent_table)
end