Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- This file: -- http://anggtwu.net/blogme3/detect-encoding.lua.html -- http://anggtwu.net/blogme3/detect-encoding.lua -- (find-angg "blogme3/detect-encoding.lua") -- Author: Eduardo Ochs <eduardoochs@gmail.com> -- -- (defun l () (interactive) (find-angg "blogme3/detect-encoding.lua")) -- «.bad-conversions» (to "bad-conversions") -- «.re» (to "re") DetectEncoding = Class { type = "DetectEncoding", new = function (fname, root) local fullfname = (root or "~/")..fname local bigstr = ee_readfile(fullfname) local lines = splitlines(bigstr) local nlines = #lines return DetectEncoding {lines=lines, nlines=nlines, bigstr=bigstr} end, __index = { lastlines0 = function (de, n) local b = max(de.nlines - n, 1) return table.concat(de.lines, "\n", b, de.nlines) end, lastlines = function (de, n) local ll = de:lastlines0(n or 7) if ll:match("\n") then ll = ll:gsub("^(.*\f)([^\f]*)$", "%2") end return ll end, firstandlastlines = function (de, n) if de.nlines < 2 then return table.concat(de.lines, "\n") end return de.lines[1].."\n"..de:lastlines(n) end, explicitencoding = function (de, n) local str = de:firstandlastlines(n) if str:match("utf%-8%-unix") then return "utf-8-unix" end if str:match("raw%-text%-unix") then return "raw-text-unix" end if str:match("no%-conversion") then return "raw-text-unix" end end, guessencoding0 = function (de) local nu8, nother = measure_utf8_ness(de.bigstr) de.nu8, de.nother = nu8, nother if nu8 == 0 and nother == 0 then return "any" end if nu8 > 0 and nother == 0 then return "utf-8-unix" end if nu8 == 0 and nother > 0 then return "raw-text-unix" end end, guessencoding = function (de, n) de.enc = de:explicitencoding(n) or de:guessencoding0() return de.enc end, }, } --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "detect-encoding.lua" de = DetectEncoding.new("TODO") = de:lastlines0(7) = de:lastlines(7) = de:lastlines() = de:firstandlastlines() = de:explicitencoding() str = de:firstandlastlines() = str = str:match("utf") = str:match("utf-8") = DetectEncoding.new("TODO"):guessencoding() = "\f" --]] --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "detect-encoding.lua" QUIET = 1 dofile "anggmake.lua" PPV(anggtranslate) -- (find-blogme3 "anggmake.lua" "anggtranslate") PPV(escriptstems) -- (find-blogme3 "anggmake.lua" "escriptstems") filelist = map(function (s) return "e/"..s..".e" end, escriptstems) filelist = anggtranslate PPV(filelist) for _,fname in ipairs(filelist) do local de = DetectEncoding.new(fname) local enc = de:explicitencoding() -- print(enc, fname) -- if not enc then print(fname) end if not enc then local nu8, nother = measure_utf8_ness(de.bigstr) if nu8 > 0 or nother > 0 then -- print(nu8, nother, fname) -- if nu8 > 0 and nother > 0 then print(nu8, nother, fname) end if nu8 > 0 and nother == 0 then print(nu8, nother, fname) end end end end fa = function (fname) return format('(find-angg "%s")', fname) end filelist = map(function (s) return "e/"..s..".e" end, escriptstems) for _,fname in ipairs(filelist) do local de = DetectEncoding.new(fname) local enc = de:explicitencoding() if enc == "raw-text-unix" then print(fa(fname)) end end -- (find-angg "LUA/lua50init.lua" "u8c_to_l1") -- (find-angg "LUA/lua50init.lua" "u8c_to_l1" "measure_utf8_ness") -- «bad-conversions» (to ".bad-conversions") -- Look for bad conversions (like the char c2s here???): -- (hexl-find-file "~/e/page.e~") -- (hexl-find-file "~/e/page.e") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) h = function (s) return tonumber(s, 16) end h = function (s) return format("%c", tonumber(s, 16)) end = h"40" fa = function (fname) return format('(find-fline "%s")', fname) end QUIET = 1 dofile "anggmake.lua" -- filelist = map(function (s) return "e/"..s..".e" end, escriptstems) for _,stem in ipairs(escriptstems) do fname = "~/e/"..stem..".e" bigstr = ee_readfile(fname) if bigstr:match(h"c2") then fname2 = "/tmp/"..stem.."-new.e" print(fname2) bigstr2 = bigstr:gsub(h"c2", "") ee_writefile(fname2, bigstr2) print(fa(fname)) print(fa(fname2)) end end end --]] -- «re» (to ".re") -- (find-es "lua5" "lpeg-quickref") require "re" --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "detect-encoding.lua" --]]