|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
#######
#
# E-scripts on using codepage850, latin1+ and the math-enhanced
# versions of these charsets
#
# Note 1: use the eev command (defined in eev.el) and the
# ee alias (in my .zshrc) to execute parts of this file.
# Executing this file as a whole makes no sense.
# An introduction to eev can be found here:
#
# (find-eev-quick-intro)
# http://angg.twu.net/eev-intros/find-eev-quick-intro.html
#
# Note 2: be VERY careful and make sure you understand what
# you're doing.
#
# Note 3: If you use a shell other than zsh things like |&
# and the for loops may not work.
#
# Note 4: I always run as root.
#
# Note 5: some parts are too old and don't work anymore. Some
# never worked.
#
# Note 6: the definitions for the find-xxxfile commands are on my
# .emacs.
#
# Note 7: if you see a strange command check my .zshrc -- it may
# be defined there as a function or an alias.
#
# Note 8: the sections without dates are always older than the
# sections with dates.
#
# This file is at <http://angg.twu.net/e/charsets.e>
# or at <http://angg.twu.net/e/charsets.e.html>.
# See also <http://angg.twu.net/emacs.html>,
# <http://angg.twu.net/.emacs[.html]>,
# <http://angg.twu.net/.zshrc[.html]>,
# <http://angg.twu.net/escripts.html>,
# and <http://angg.twu.net/>.
#
#######
# «.tcs» (to "tcs")
# «.tcs_latin1-850» (to "tcs_latin1-850")
# «.tcs_patch» (to "tcs_patch")
# «.latin1-850_font» (to "latin1-850_font")
# «.page_to_latin1-850» (to "page_to_latin1-850")
# «.charset_indicator» (to "charset_indicator")
# «.mapscrn» (to "mapscrn")
# «.recode» (to "recode")
# «.pdftotext» (to "pdftotext")
# «.utf-8» (to "utf-8")
# «.u8_to_l1» (to "u8_to_l1")
# «.l1_to_u8» (to "l1_to_u8")
# «.unicode-data» (to "unicode-data")
# «.unzip-unicode-problem» (to "unzip-unicode-problem")
# «.iconv» (to "iconv")
# (find-es "print" "a2ps-cp850")
#####
#
# tcs (for conversion between charsets)
# 2001jan03
#
#####
# «tcs» (to ".tcs")
# (to "tcs_patch")
# (find-status "tcs")
# (find-vldifile "tcs.list")
# (find-fline "/usr/doc/tcs/")
# The .ps is just a ps version of the manpage:
#gv /usr/doc/tcs/tcs.ps.gz
# (eeman "1 tcs")
#*
pdsc $SDEBIAN/dists/potato/main/source/text/tcs_1-6.dsc
#*
tcs -lv |& tee ~/o
#*
# (find-fline "~/o")
# (code-c-d "tcs" "/usr/src/tcs-1/")
# (find-tcsfile "tcs.c" "850")
# (find-tcsfile "tcs.c" "tabps2[256] =")
# (find-tcsfile "tcs.c" "tab8859_1[256] =")
# (find-tcsfile "")
# (find-tcsfile "regress")
# (find-tcsfile "Makefile")
# (find-man "7 utf-8")
#####
#
# tcs: comparing the 850 and latin1 charsets and building latin1-850
# 2001jan03
#
#####
# «tcs_latin1-850» (to ".tcs_latin1-850")
# (to "tcs")
# (to "tcs_patch")
# (to "latin1-850_font")
# Look for repetitions in the tables that interest me (850 and latin1)...
#*
# Test "lsort":
# (eeman "3tcl lsort")
expect -c '
puts [lsort -integer {1 2 3 10 20 10 11}]
puts [lsort -integer {1 2 3 10 20 10 11 0x0a}]
'
#*
# (find-tcsfile "tcs.c" "tabps2[256] =")
cat > /tmp/ps2runes0 <<'---'
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
00c7 00fc 00e9 00e2 00e4 00e0 00e5 00e7
00ea 00eb 00e8 00ef 00ee 00ec 00c4 00c5
00c9 00e6 00c6 00f4 00f6 00f2 00fb 00f9
00ff 00d6 00dc 00f8 00a3 00d8 00d7 0192
00e1 00ed 00f3 00fa 00f1 00d1 00aa 00ba
00bf 00ae 00ac 00bd 00bc 00a1 00ab 00bb
2591 2592 2593 2502 2524 00c1 00c2 00c0
00a9 2563 2551 2557 255d 00a2 00a5 2510
2514 2534 252c 251c 2500 253c 00e3 00c3
255a 2554 2569 2566 2560 2550 256c 00a4
00f0 00d0 00ca 00cb 00c8 0131 00cd 00ce
00cf 2518 250c 2588 2584 00a6 00cc 2580
00d3 00df 00d4 00d2 00f5 00d5 00b5 00fe
00de 00da 00db 00d9 00fd 00dd 00af 00b4
00ad 00b1 2017 00be 00b6 00a7 00f7 00b8
00b0 00a8 00b7 00b9 00b3 00b2 220e 00a0
---
# (find-tcsfile "tcs.c" "tab8859_1[256] =")
cat > /tmp/latin1runes0 <<'---'
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af
b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf
d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df
e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef
f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
---
expect -c '
foreach hex [exec cat /tmp/ps2runes0] {lappend ps2runes [expr 0x$hex]}
puts [join [lsort -integer $ps2runes] "\n"]
' > /tmp/ps2runes.sort
uniq -d /tmp/ps2runes.sort
expect -c '
foreach hex [exec cat /tmp/latin1runes0] {lappend latin1runes [expr 0x$hex]}
puts [join [lsort -integer $latin1runes] "\n"]
' > /tmp/latin1runes.sort
uniq -d /tmp/latin1runes.sort
# No repetitions in any of the two tables; this is very good!
wc /tmp/ps2runes.sort
wc /tmp/latin1runes.sort
comm /tmp/latin1runes.sort /tmp/ps2runes.sort > /tmp/ocomm
# (find-fline "/tmp/ocomm")
#*
# In fact things are much better yet. Inspecting the output of "comm"
# we see that the runes that are in only one of the tables are
# 128..159 for latin1 (that are blank) and some runes >=0x100 on
# cp850:
expect -c '
foreach hex [exec cat /tmp/ps2runes0] {
if "0x$hex>256" { lappend ps2runes 0x$hex }
}
puts [join [lsort -integer $ps2runes] ", "]
' | tee ~/o
# (find-fline "~/o")
# 0x0131, 0x0192, 0x2017, 0x220e, 0x2500, 0x2502, 0x250c, 0x2510,
# 0x2514, 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550,
# 0x2551, 0x2554, 0x2557, 0x255a, 0x255d, 0x2560, 0x2563, 0x2566,
# 0x2569, 0x256c, 0x2580, 0x2584, 0x2588, 0x2591, 0x2592, 0x2593,
#*
#####
#
# tcs patch - adding the "latin1-850" charset
# 2000jan06
#
#####
# «tcs_patch» (to ".tcs_patch")
# (to "tcs_latin1-850")
# Patch tcs to add a new charset to it, "latin1-850", such that the
# conversion latin1-850<->ps2/cp850 is reversible and equivalent to
# latin1<->ps2/cp850 on all the usual chars.
# (find-angg ".zshrc" "charsets")
#*
# (wrap nil)
pdsc $SDEBIAN/dists/potato/main/source/text/tcs_1-6.dsc
cd /usr/src/tcs-1/
patch -p0 <<'%%%'
--- tcs.c.orig Wed Oct 23 19:13:20 1996
+++ tcs.c Sat Jan 6 19:18:08 2001
@@ -330,6 +330,31 @@
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff,
};
+/* A special table with the cp850/ps2 runes in a latin1ish order
+ * See: (find-es "print" "tcs_patch")
+ */
+long tab8859_1_850[256] =
+{
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+0x0131, 0x0192, 0x2017, 0x220e, 0x2500, 0x2502, 0x250c, 0x2510,
+0x2514, 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550,
+0x2551, 0x2554, 0x2557, 0x255a, 0x255d, 0x2560, 0x2563, 0x2566,
+0x2569, 0x256c, 0x2580, 0x2584, 0x2588, 0x2591, 0x2592, 0x2593,
+0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff,
+};
+
long tab8859_2[256] =
{
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
@@ -934,6 +959,7 @@
{ "ascii", "7-bit ASCII", Table, (void *)tabascii },
{ "8859-1", "Latin-1 (Western and Northern Europe including Italian)", Table, (void *)tab8859_1 },
{ "latin1", "ISO 8859-1", Table, (void *)tab8859_1 },
+ { "latin1-850", "Latin-1 with the ps2 runes (edrx)", Table, (void *)tab8859_1_850 },
{ "8859-2", "Latin-2 (Eastern Europe except Turkey and the Baltic countries)", Table, (void *)tab8859_2 },
{ "8859-3", "Latin-3 (Mediterranean, South Africa, Esperanto)", Table, (void *)tab8859_3 },
{ "8859-4", "Latin-4 (Scandinavia and the Baltic countries; obsolete)", Table, (void *)tab8859_4 },
%%%
patch -b -p0 debian/changelog <<'%%%'
0a1,6
> tcs (1-6edrx) custom; urgency=low
>
> * Added the latin1-850 charset.
>
> -- Eduardo Ochs <edrx@inx.com.br> Tue, 19 Jun 2001 22:00:00 +0200
>
%%%
cd /usr/src/tcs-1/
debian/rules binary |& tee odrb
#*
dpkg -i /usr/src/tcs_1-6edrx_i386.deb
#*
#####
#
# Make a latin1-850 font from a cp850 font
# 2000jan07
#
#####
# «latin1-850_font» (to ".latin1-850_font")
# (to "tcs_latin1-850")
# (find-angg ".zshrc" "charsets")
#*
# Get the scrambling table
expect -c '
for {set i 0; set s {}} {$i<256} {incr i} {append s [format %c $i]}
puts -nonewline $s
' > /tmp/256
wc /tmp/256
isoto850 < /tmp/256 > /tmp/256b
# 850toiso < /tmp/256 > /tmp/256b
#*
# Apply the scramble to a font
cd ~/MTA/
expect -c '
proc readfile {fname} {
set ch [open $fname r]; set bigstr [read $ch]; close $ch
return $bigstr
}
proc writefile {fname bigstr} {
set ch [open $fname w]; puts -nonewline $ch $bigstr; close $ch
}
proc ord {c} { scan $c %c n; return $n }
proc fcharnew {n} { global map; fchar [ord [string index $map $n]] }
proc fchar {n} { global origfont ht
string range $origfont [expr $n*$ht] [expr $n*$ht+$ht-1]
}
set origfont [readfile ega1.8]
set ht 8
set map [readfile /tmp/256b]
for {set i 0; set s {}} {$i<256} {incr i} {append s [fcharnew $i]}
writefile latin1-850.8 $s
'
#*
# (find-fline "~/MTA/vtutil" "examples of usage:")
cd ~/MTA/
./vtutilsh vtutil rowsofbigchars latin1-850.8 8 | l
#*
850toiso < ~/MTA/vtutil > /tmp/vtutil-latin
cd ~/MTA/
./vtutilsh /tmp/vtutil-latin modifyfont 256 8 latin1-850.8 latinmath.8
./vtutilsh /tmp/vtutil-latin rowsofbigchars latinmath.8 8 | l
./vtutilsh /tmp/vtutil-latin setfont 256 8 latinmath.8 file0
#*
~/MTA/vtutilsh /tmp/vtutil-latin
#####
#
# converting my public home stuff to latin1-850
# 2000jan07
#
#####
# «page_to_latin1-850» (to ".page_to_latin1-850")
# This is just a first test. Don't take it seriously.
#*
# (find-es "escripts" "makepagedeb_angg")
makeLedrxtgz
rm -R /tmp/edrxdeb
mkdir /tmp/edrxdeb
cd /tmp/edrxdeb
cp -v ~root/bin/edrxpage .
cp -v ~root/TH/L/a/s/edrx.tgz .
chmod 755 edrxpage
./edrxpage make_package
#*
rm -Rv /tmp/e/
mkdir /tmp/e/
cd /tmp/e/
for i in $(cd $ES; print -l *.e); do echo $i; 850toiso < $ES/$i > $i; done
# (find-fline "$ES/escripts.e")
# (find-fline "$ES/tex.e" "ee-charset-indicator")
# (find-fline "/tmp/e/tex.e")
#*
rm -Rv /tmp/edrx1
mkdir /tmp/edrx1
cd /tmp/edrx1
edrxpage THR
# find * -type f | egrep '(8|16|gz|png)$'
# find * -type f | egrep -v '(8|16|gz|png)$'
for i in $(find * -type f | egrep -v '(8|16|gz|png)$'); do
850toiso < $i > o
chmod --reference=$i o
mv -v o $i
done
#*
cd /tmp/edrx1
HOME=`pwd` zsh
math
#*
#####
#
# Which characters to use as charset indicators?
# 2000jan09
#
#####
# «charset_indicator» (to ".charset_indicator")
# (to "latin1-850_font")
#*
perl -e 'for ($i=0; $i<256; ++$i) { printf "%c", $i }' > /tmp/256
# od -t x1 /tmp/256
# od -t u1 /tmp/256
isoto850 < /tmp/256 > /tmp/256b
expect -c '
set scramble [exec cat /tmp/256b]
proc ord {str} { scan $str "%c" ord; return $ord }
proc char {n} { format %c $n }
proc scramble {n} { global scramble
ord [string index $scramble $n]
}
for {set i 128} {$i<256} {incr i} {
set cycle $i
for {set j [scramble $i]} {$j!=$i} {set j [scramble $j]} {
lappend cycle $j
}
puts [format "legth %2d: %s" [llength $cycle] $cycle]
}
' | sort | tee ~/o
#*
# We have three cycles of length 3, one of length 8, one of 15, one of
# 23 and one of 73...
# (+ 3 3 3 8 15 23 73)
# Taking one representative of each cycle length among the chars in
# the range 160..254 we get: 3->180 8->163 15->195 23->161 73->160
# (format "%c%c%c%c%c" 160 161 163 180 195)
expect -c 'puts [format "%c%c%c%c%c" 160 161 163 180 195]'
expect -c 'puts [format "%c%c%c%c%c" 160 161 163 180 195]' \
| 850toiso
expect -c 'puts [format "%c%c%c%c%c" 160 161 163 180 195]' \
| isoto850
#*
# (find-fline "~/o")
# Note: the 23-cycle has only one char in the range 128..159, 148:
# (148 188 172 170 166 221 237 161 173 240 208 209 165 190 243 162 189
# 171 174 169 184 247 246)
#####
#
# mapscrn
# 2001jan23
#
#####
# «mapscrn» (to ".mapscrn")
# (find-status "kbd")
# (find-vldifile "kbd.list")
# (find-fline "/usr/doc/kbd/")
# (eeman "8 mapscrn")
# (eeman "4 console_codes" "Select user mapping")
#*
perl -e 'for ($i=0; $i<256; ++$i) { printf "%c", $i }' > /tmp/256
wc /tmp/256
isoto850 < /tmp/256 > /tmp/256a
#*
echo -ne '\e(U'
mapscrn /tmp/256
# Straight to ROM mapping:
#
# ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿
# ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
# àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
850toiso < /tmp/256 > /tmp/256b
mapscrn /tmp/256b
echo -ne '\e(K'
#
# If the screen font is latinmath then this
# will look like the math850 table:
#
# ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿
# ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
# àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
isoto850 < /tmp/256 > /tmp/256a
mapscrn /tmp/256a
echo -ne '\e(K'
#
# If the screen font is math850 then this
# will look like the latinmath table:
#
# ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿
# ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
# àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
#*
#####
#
# recode
# 2004sep26 / 2024sep01
#
#####
# «recode» (to ".recode")
# (find-status "recode")
# (find-vldifile "recode.list")
# (find-udfile "recode/")
# (find-status "recode-doc")
# (find-vldifile "recode-doc.list")
# (find-udfile "recode-doc/")
# (find-node "(recode)Top")
# (find-node "(recode)flat")
# (find-man "recode")
# (find-sh "recode --help")
# (find-sh "recode -l")
# (find-sh "recode --list")
# (find-sh "recode --verbose --list")
# (find-sh "recode --verbose --list=flat")
# https://unix.stackexchange.com/questions/631652/remove-accents-from-characters
# recode -f utf8..flat < textin.txt > flattext.out
#*
# (find-node "(recode)ASCII")
recode -lf us ;# for commented ASCII
recode -ld us ;# for concise decimal table
recode -lo us ;# for concise octal table
recode -lh us ;# for concise hexadecimal table
#*
# Descriptions for all chars that recode knows about
# (find-node "(recode)UCS-2")
# (find-node "(recode)Test")
# (find-node "(recode)Dump")
# (find-fline "~/o2")
echo -n \
| recode u2/test16..dump \
| tail +3 \
| tee > ~/o \
| awk '
NF==1 {
if (!first) { first = $1 }
last = $1
}
NF>1 {
if (first) { printf "%s..%s\n", first, last; first = "" }
print
}
END { if (first) { printf "%s..%s\n", first, last; first = "" } }
' \
| tee ~/o2
#*
# (find-node "(recode)Requests")
# (find-node "(recode)Requests" "If the double\ndot separator is omitted")
echo -n | recode -v ../x1
echo -n | recode -v /x1..u8
echo -n | recode -v l1
#*
# (find-man "7 utf8")
# (find-node "(recode)UTF-8")
echo -n àáâãä | recode l1..dump | tail +3
echo -n àáâãä | recode l1..u2 | recode ../x1
echo -n Aaàáâãä | recode l1..u2 | recode ../x2
echo -n Aaàáâãä | recode l1..u8 | recode ../x1
#*
for i in $(recode -l | tr ' ' \\n); do
recode -lf $i
done \
|& tee /tmp/o
sort /tmp/o | uniq > /tmp/o2
#*
# (find-fline "/tmp/o2")
#*
cd /tmp/
cat > accents.latin1 <<'%%%'
`'^~" `'^~"
a àáâãä ÀÁÂÃÄ åÅ
e èéê ë ÈÉÊ Ë æÆ
i ìíî ï ÌÍÎ Ï çÇ
o òóôõö ÒÓÔÕÖ ñÑ
u ùúû ü ÙÚÛ Ü ýÿ
%%%
recode l1..u8 < accents.latin1 > accents.utf8
#*
# (find-man "1 less")
LESSCHARSET=latin1 less -f /tmp/accents.{latin1,utf8}
LESSCHARSET=utf-8 less -f /tmp/accents.{latin1,utf8}
#*
# (find-htetfile "Unicode-HOWTO.gz")
#*
cd /tmp/
debtarxvzf /hdd6/debian/dists/potato/main/binary-i386/utils/kbd_0.99-9.2.deb \
./usr/bin/setfont
cp -iv usr/bin/setfont ~/bin/
#*
setfont ~/MTA/math1.8
loadkeys ~/MTA/defkeymap850b.map
#*
perl -e 'for ($i=0; $i<256; ++$i) { printf "%c", $i }' > /tmp/256
isoto850 < /tmp/256 > /tmp/256b
# od -t x1 /tmp/256
# od -t u1 /tmp/256
#*
perl -e 'for ($i=128; $i<256; ++$i) { printf "%c", $i }' > /tmp/128
isoto850 < /tmp/128 > /tmp/128b
tr $(</tmp/128) $(</tmp/128b) < /tmp/128 | od -t x1
cat /tmp/128b | od -t x1
#*
#####
#
# Latin-1-ifying the output of pdftotext
# 2012jan18
#
#####
# «pdftotext» (to ".pdftotext")
# (find-es "ps" "pdftotext")
# (find-efunction 'brpdftextl)
# (find-efunction 'find-pdf-text "-enc Latin1")
# (find-man "1 pdftotext" "-enc encoding-name")
#####
#
# UTF-8
# 2013may30
#
#####
# «utf-8» (to ".utf-8")
# (find-es "lua5" "utf8")
# (find-esgrep "grep -niH -e utf-8 *.e")
# (find-esgrep "grep -niH -e utf8 *.e")
# (find-man "7 utf-8")
# (find-fline "/usr/share/i18n/charmaps/UTF-8.gz")
# (find-fline "/usr/share/i18n/charmaps/UTF-8.gz" "ARABIC LETTER ALEF")
#####
#
# u8_to_l1
# 2014jul21
#
#####
# «u8_to_l1» (to ".u8_to_l1")
# (find-angg "LUA/lua50init.lua" "u8_to_l1")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
lua51 -e 'for i=192,252 do printf(" \\%d %c", i, i) end' |
recode l1..u8 |
lua51 -e '
f = function (c) return format("\\%d", string.byte(c)) end
print((io.read():gsub("[\128-\255]", f)))
'
#####
#
# l1_to_u8
# 2019feb11
#
#####
# «l1_to_u8» (to ".l1_to_u8")
# \128 -> \194\128
# (...)
# \191 -> \194\191
# \192 -> \195\128
# (...)
# \255 -> \195\191
# (find-einsert '(194 10 (128 191) 10 (192 255)))
# (find-angg "LUA/lua50init.lua" "toslashhex")
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
bigstr = ""
for i=128,255 do bigstr = bigstr..format("%d -> %c\n", i, i) end
= bigstr
writefile("/tmp/l1", bigstr)
= getoutput("recode l1..u8 < /tmp/l1 > /tmp/u8")
bigstr = readfile("/tmp/u8")
= toslashhex(bigstr)
#####
#
# unicode-data
# 2014sep20
#
#####
# «unicode-data» (to ".unicode-data")
# (find-status "unicode-data")
# (find-vldifile "unicode-data.list")
# (find-udfile "unicode-data/")
http://www.fileformat.info/info/unicode/char/1d312/index.htm
http://www.fileformat.info/info/unicode/char/1d312/browsertest.htm
http://www.fileformat.info/info/unicode/char/1d40d/index.htm
#####
#
# unzip-unicode-problem
# 2021jun16
#
#####
# «unzip-unicode-problem» (to ".unzip-unicode-problem")
# (find-man "unzip")
# (find-sh "unzip")
# (find-sh "unzip -hh")
# (find-sh "unzip -hh" "-U [UNICODE enabled]")
# (find-sh "unzip -hh" "-^")
# (find-sh "unzip -hh" "Unicode:")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
# (find-fline "/tmp/tark/")
rm -Rv /tmp/tark/
mkdir /tmp/tark/
cd /tmp/tark/
unzip -U ~/tmp/tark-pequenos.zip
# (find-fline "~/tmp/tark-pequenos.zip")
<edrx> anyone knows 1) the name of this encoding 2) if recode can
deal with it? when I unzip .zips that contains files with
accents in their filenames I oftern get filenames like this:
"Hoje n#U00e3o Haver#U00e1 Sa#U00edda Livre"
<grym> maybe i can go swimming in it legally in 2022
https://thecharles.org/city-splash/
<homerj> edrx: I think if you light a candle and say that, you can
talk to dead people
<bpalmer> edrx: unicode e3 is latin small letter a with tilde;
would that make sense?
*** clone QUIT Quit: WeeChat 3.1
<homerj> apalmer, known for half lemonade/half iced tea
<bpalmer> and similarly e1 is small letter a with bacute accent,
and ed is small letter i with acute accent.
<edrx> bpalmer: yes, that's it. after conversion that would be
"Hoje não haverá saída livre".
<homerj> bpalmer, known for memorizing unicode
*** muto JOIN
<bpalmer> edrx: so it looks like the dumbest possible way of
generating a filename that every filesystem should
accept.
<bpalmer> (ignoring length)
<edrx> bpalmer: agreed
<bpalmer> I had to look up a unicode table, sadly.
*** hmmmas JOIN
*** holomorph JOIN
<edrx> I can adapt this code -
https://lists.gnu.org/archive/html/eev/2021-06/msg00010.html
- to recognize this encoding, but it would be easier to just
run "recode thisencoding..l1" on each filename.
*** irek JOIN
*** abhixec_ QUIT Ping timeout: 268 seconds
<grym> rudybot: unicode sandwiches, that's what's for dinner
<rudybot> grym: i buy my sandwiches in rods
<grym> kinky
#####
#
# iconv
# 2021sep06
#
#####
# «iconv» (to ".iconv")
# (find-man "1 iconv")
# (find-fline "~/LOGS/2021sep06.emacs")
# (find-fline "~/LOGS/2021sep06.emacs" "legendsofkyrandia12walk.htm")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
cd /tmp/
curl -s --insecure \
https://www.thecomputershow.com/computershow/walkthroughs/legendsofkyrandia12walk.htm \
| iconv --from CPIBM861 --to UTF-8//IGNORE \
> /tmp/o.html
# (find-fline "/tmp/o.html")
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
-- http://www.thecomputershow.com/computershow/walkthroughs/legendsofkyrandia12walk.htm
fname = "$S/http/www.thecomputershow.com/computershow/walkthroughs/legendsofkyrandia12walk.htm"
bigstr0 = ee_readfile(fname)
lines0 = splitlines(bigstr0)
lines = {}
for i=546,746 do table.insert(lines, lines0[i]) end
bigstr = table.concat(lines, "\n")
seqs = SetL.new()
for s in bigstr:gmatch("[\128-\255]+") do
seqs:add(s)
end
= seqs:ksc()
numberedseqs = SetL.new()
for i,s in ipairs(seqs:ks()) do
numberedseqs:add(s, i)
PP(i, s)
end
# (find-fline "~/LOGS/2023feb03.emacs")
# (find-fline "~/LOGS/2023feb03.emacs" "emoji-zwj-sequences")
# https://unicode.org/emoji/charts/emoji-zwj-sequences.html
# https://blog.emojipedia.org/fun-emoji-hacks/
2012nov05, lua-l (from William Ahearn):
http://docs.parrot.org/parrot/devel/html/docs/pdds/pdd28_strings.pod.html
https://www.quora.com/Why-is-there-no-character-for-superscript-q-in-Unicode
https://www.compart.com/en/unicode/U+02D9
# Local Variables:
# coding: utf-8-unix
# End: