eev-current/eev-codings.el (htmlized)

Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
;;; eev-codings.el -- tricks to support both the UTF8 coding system and unibyte  -*- lexical-binding: nil; -*-

;; Copyright (C) 2018-2019,2023 Free Software Foundation, Inc.
;;
;; This file is part of GNU eev.
;;
;; GNU eev is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; GNU eev is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
;;
;; Author:     Eduardo Ochs <eduardoochs@gmail.com>
;; Maintainer: Eduardo Ochs <eduardoochs@gmail.com>
;; Version:    20230127
;; Keywords:   e-scripts
;;
;; Latest version: <http://anggtwu.net/eev-current/eev-coding.el>
;;       htmlized: <http://anggtwu.net/eev-current/eev-coding.el.html>
;;       See also: <http://anggtwu.net/eev-current/eev-beginner.el.html>
;;                 <http://anggtwu.net/eev-intros/find-eev-intro.html>

;;; Commentary:

;; Until mid-2017 a user who wanted to use anchors (like "«tag»") in
;; both unibyte and multibyte buffers had to put something like this
;;
;;   ee-anchor-format:  "«%s»"
;;
;; in the local variables section at the end of (at least some) of his
;; files; the functions defined here make the local variables section
;; trick unnecessary - `ee-format-as-anchor' now uses `ee-tolatin1' to
;; produce a search string that works both unibyte, on UTF-8, on
;; latin-1 files and some (most of?) other encodings.
;;
;; NOTE: `ee-tolatin1' a hack! Conversion to latin-1 seems to work in
;; most cases, but I don't understand very well the reasons why... I
;; have some notes about all this in these e-script blocks in my notes
;; about Emacs:
;;
;;   (find-es "emacs" "unibyte-2019")
;;   (find-es "emacs" "unibyte-2019-search")
;;   (find-es "emacs" "creating-utf8-files")
;;   (find-es "emacs" "ee-re-to")
;;   http://anggtwu.net/e/emacs.e.html#unibyte-2019
;;   http://anggtwu.net/e/emacs.e.html#unibyte-2019-search
;;   http://anggtwu.net/e/emacs.e.html#creating-utf8-files
;;   http://anggtwu.net/e/emacs.e.html#ee-re-to
;;
;;
;; NOTE 2: Sorry for taking so long!! Here's what happened. This page
;;
;;   http://anggtwu.net/glyphs.html
;;
;; tells a bit about the hacked 256-char fonts that I created many
;; years before UTF-8 became standard, and that I used for ages in
;; some of my notes and .tex files... I wanted to maintain
;; compatibility with the files that used those fonts, and this turned
;; out to be very hard - these hacked fonts only worked in files and
;; buffers in which the encoding was "raw-text",
;;
;;   (find-elnode "Non-ASCII Characters")
;;   (find-elnode "Disabling Multibyte" "unibyte")
;;   (find-elnode "Disabling Multibyte" "raw-text")
;;
;; and before 2019 I had a *very* poor understanding of how Emacs
;; converts between unibyte and multibyte and between raw-text,
;; latin-1 and utf-8...



;; «.ee-tolatin1»	(to "ee-tolatin1")
;; «.ee-tolatin1-re»	(to "ee-tolatin1-re")




;;;                  _        _       _   _       _ 
;;;   ___  ___      | |_ ___ | | __ _| |_(_)_ __ / |
;;;  / _ \/ _ \_____| __/ _ \| |/ _` | __| | '_ \| |
;;; |  __/  __/_____| || (_) | | (_| | |_| | | | | |
;;;  \___|\___|      \__\___/|_|\__,_|\__|_|_| |_|_|
;;;                                                 
;; «ee-tolatin1» (to ".ee-tolatin1")
;; Original comment:
;;
;; 2017jul29: this is a low-level hack to allow anchors like "«tag»"
;; to work on both unibyte and multibyte buffers and files without
;; requiring the user to set the variable `ee-anchor-format' in the
;; local variables section.

(defun ee-to-coding (coding str)
  (ee-no-properties (decode-coding-string str coding)))

(defun ee-tolatin1 (str)
  "Make STR compatible with both unibyte and multibyte buffers.
Convert STR to a multibyte format that works in both
unibyte (raw-text) and multibyte (e.g., utf-8) buffers. This may
fail if STR contains chars that are not in the latin-1 range.
This function is used by `ee-format-as-anchor'."
  (ee-to-coding 'latin-1 str))




;;;                  _        _       _   _       _                
;;;   ___  ___      | |_ ___ | | __ _| |_(_)_ __ / |      _ __ ___ 
;;;  / _ \/ _ \_____| __/ _ \| |/ _` | __| | '_ \| |_____| '__/ _ \
;;; |  __/  __/_____| || (_) | | (_| | |_| | | | | |_____| | |  __/
;;;  \___|\___|      \__\___/|_|\__,_|\__|_|_| |_|_|     |_|  \___|
;;;                                                                
;; «ee-tolatin1-re» (to ".ee-tolatin1-re")
;; 2019feb24: this is a hack!
;; Test code: (find-es "emacs" "unibyte-2019-search")

(defun ee-tolatin1-re (re)
  "Make the regexp RE compatible with the current buffer.
This is similar to `ee-tolatin1', but for regexps that contain
the \"«»\"s used to delimit anchors. For example,

  (ee-tolatin1-re \"\\253\\([!-~]\\)\\273\")

should return a regexp for anchors that works in the current
buffer. This is a hack and a work in progress!!! See the code for
comments."
    (let ((bfcs buffer-file-coding-system))
      (cond ((eq bfcs 'iso-latin-1-unix) (ee-tolatin1 re))
            ((eq bfcs 'raw-text-unix)    re)
            ((eq bfcs 'utf-8-unix)       (ee-tolatin1 re))
            (t                           re))))



(provide 'eev-codings)



;; Local Variables:
;; coding: utf-8-unix
;; no-byte-compile: t
;; End: