|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
#######
#
# E-scripts on processing and editing subtitles.
#
# Note 1: use the eev command (defined in eev.el) and the
# ee alias (in my .zshrc) to execute parts of this file.
# Executing this file as a whole makes no sense.
# An introduction to eev can be found here:
#
# (find-eev-quick-intro)
# http://angg.twu.net/eev-intros/find-eev-quick-intro.html
#
# Note 2: be VERY careful and make sure you understand what
# you're doing.
#
# Note 3: If you use a shell other than zsh things like |&
# and the for loops may not work.
#
# Note 4: I always run as root.
#
# Note 5: some parts are too old and don't work anymore. Some
# never worked.
#
# Note 6: the definitions for the find-xxxfile commands are on my
# .emacs.
#
# Note 7: if you see a strange command check my .zshrc -- it may
# be defined there as a function or an alias.
#
# Note 8: the sections without dates are always older than the
# sections with dates.
#
# This file is at <http://angg.twu.net/e/subtitles.e>
# or at <http://angg.twu.net/e/subtitles.e.html>.
# See also <http://angg.twu.net/emacs.html>,
# <http://angg.twu.net/.emacs[.html]>,
# <http://angg.twu.net/.zshrc[.html]>,
# <http://angg.twu.net/escripts.html>,
# and <http://angg.twu.net/>.
#
#######
# «.subed» (to "subed")
# «.subed-mpv-low-level» (to "subed-mpv-low-level")
# «.subed-keys» (to "subed-keys")
# «.subed-git» (to "subed-git")
# «.waveform-test» (to "waveform-test")
# «.waveform-test-2» (to "waveform-test-2")
# «.mpv-geometry» (to "mpv-geometry")
# «.pysubs2» (to "pysubs2")
# «.gryms-code» (to "gryms-code")
# «.Subtitles.lua» (to "Subtitles.lua")
# «.aeneas» (to "aeneas")
# «.angle-brackets» (to "angle-brackets")
# «.waveforms» (to "waveforms")
<edrx> sachac: I used this to download the automatic subtitles of
the video that I have just recorded -
https://0x0.st/oTrh.txt - but the .vtt file looks like this:
https://0x0.st/oTrC.txt
<edrx> sachac: this is tricky to edit because of the timestamps on
words - like <00:00:04.400><c> and</c><00:00:04.880><c>
this</c><00:00:05.120>...
<edrx> sachac: do you have tricks to convert that to another
format? I tried pysubs2 - more precisely: pip3 install
pysubs2; pysubs2 --help; pysubs2 --to srt *.vtt
#####
#
# subed
# 2022apr19
#
#####
# «subed» (to ".subed")
# (find-es "emacs" "subed")
# (find-epackage-links 'subed "subed" t)
# (find-epackage 'subed)
# (code-c-d "subed" "~/.emacs.d/elpa/subed-1.0.10/")
# (code-c-d "subed" "~/.emacs.d/elpa/subed/")
# (find-subedfile "")
# (find-subedfile "subed/subed-mpv.el")
# https://github.com/sachac/subed
# (find-subedgrep "grep --color=auto -niH --null -e cps *")
# (find-es "mplayer" "geometry")
-geometry 400x300+520+100
# (find-efunctiondescr 'customize-apropos)
# (find-efunction 'customize-apropos)
# (find-efunction 'customize-apropos "apropos-read-pattern")
# (customize-apropos "")
# (customize-apropos "subed")
# (find-customizegroup 'subed)
# (customize-group 'subed)
# (find-efunction 'find-customizegroup)
# (customize-variable 'subed-default-subtitle-length)
# (customize-option 'subed-default-subtitle-length)
#####
#
# Low-level tests for subed-mpv.el
# 2022oct09
#
#####
# «subed-mpv-low-level» (to ".subed-mpv-low-level")
# (find-fline "~/LOGS/2022oct09.emacs" "<quiliro> edrx: subed-mpv is not found")
# (to "subed")
# (require 'subed-mpv)
# (find-eapropos "subed-mpv")
# (find-efunction 'subed-mpv--play)
# (find-efunction 'subed-mpv-jump)
# (find-efunction 'subed-mpv-pause)
# (find-efunction 'subed-mpv-unpause)
;; Check that you have the video:
(find-1stclassvideo-links "2022pict2elua")
(find-2022pict2eluavideo "0:00")
;; Copy it and its subtitles to /tmp/:
(find-eevvideossh0 "cp -v 2022-pict2e-lua.{mp4,vtt} /tmp/")
(setq my-video "/tmp/2022-pict2e-lua.mp4")
;; Then...
(require 'subed-mpv)
(subed-mpv--play my-video)
(subed-mpv-jump 20000)
(subed-mpv-jump 200000)
(subed-mpv-pause)
(subed-mpv-unpause)
#####
#
# subed-keys
# 2022jul15
#
#####
# «subed-keys» (to ".subed-keys")
# (find-eevvideosfile "2022jul10-apresentacao-C2.vtt")
# (find-es "emacs" "transparence-in-keymaps")
# (find-subedfile "subed.el" "(defconst subed-mode-map")
# (require 'subed)
# (find-ekeymapdescr subed-mode-map)
;; M-j subed-mpv-jump-to-current-subtitle
;; M-k subed-kill-subtitle
(define-key subed-mode-map (kbd "M-j") nil)
(define-key subed-mode-map (kbd "M-k") nil)
(define-key subed-mode-map (kbd "M-p") nil)
(defun j () (interactive) (subed-mpv-jump-to-current-subtitle))
(defun k () (interactive) (subed-kill-subtitle))
(defun sk () (interactive) (find-es "subtitles" "subed-keys"))
(defun sk () (interactive) (find-2b '(find-es "subtitles" "subed-keys")))
(defun sk () (interactive) (find-2a '(find-subedfile "subed.el" "(defconst subed-mode-map")))
#####
#
# subed-git
# 2023apr09
#
#####
# «subed-git» (to ".subed-git")
# (find-epackage-links 'subed "subed" t)
# (find-es "emacs" "package-vc-install")
# (package-vc-install "https://github.com/sachac/subed")
# https://github.com/sachac/subed/issues/64
# (find-git-links "https://github.com/sachac/subed" "subed")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
rm -Rfv ~/usrc/subed/
cd ~/usrc/
git clone https://github.com/sachac/subed
cd ~/usrc/subed/
export PAGER=cat
git branch --list -a
git for-each-ref
git log --oneline --graph --all -20
git checkout main
git checkout waveform
# (find-fline "~/usrc/")
# (find-fline "~/usrc/subed/")
# (find-gitk "~/usrc/subed/")
# (code-c-d "subed" "~/usrc/subed/")
# (code-c-d "subeds" "~/usrc/subed/subed/")
# (find-subedfile "")
# (find-subedsfile "")
# (find-subedsh "find * | sort")
# (find-subedfile "subed/subed-waveform.el")
# (find-subedsgrep "grep --color=auto -nH --null -e auto-mode *.el")
# (find-1stclassvideo-links "eev2021")
# (find-psne-1stclassvideo-links "eev2021")
# (add-to-list 'load-path "~/usrc/subed/subed/")
# (find-es "emacs" "locate-library")
# (locate-library "subed-waveform.el")
# (find-subedsfile "subed-waveform.el")
# (find-subedsfile "subed-vtt.el")
(require 'subed)
(require 'subed-waveform)
(require 'subed-vtt)
(find-subedsfile "subed.el")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
# (find-fline "/tmp/subtest/")
# (find-fline "/tmp/subtest/emacsconf2021.vtt")
rm -Rv /tmp/subtest/
mkdir /tmp/subtest/
cd /tmp/subtest/
# wget http://anggtwu.net/eev-videos/emacsconf2021.mp4
# wget http://anggtwu.net/eev-videos/emacsconf2021.vtt
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 .
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt .
• (eepitch-shell)
• (eepitch-kill)
• (eepitch-shell)
rm -Rv /tmp/sub/
mkdir /tmp/sub/
cd /tmp/sub/
wget -O a.mp4 http://anggtwu.net/eev-videos/emacsconf2021.mp4
wget -O a.vtt http://anggtwu.net/eev-videos/emacsconf2021.vtt
•• (find-evariable 'subed-mpv-media-file)
• (setq subed-mpv-media-file "/tmp/sub/a.mp4")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
# We will use the local copy of this video:
# (find-1stclassvideo-links "eev2021")
rm -Rv /tmp/sub/
mkdir /tmp/sub/
cd /tmp/sub/
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 a.mp4
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt a.vtt
# Now visit its .vtt,
# type M-x eeit between two subtitles,
# and try the test block...
* (find-fline "/tmp/sub/a.vtt")
subed-mpv--socket, subed-mpv--server-start, subed-mpv--server-stop
#####
#
# Test subed-waveform
# 2023aug03
#
#####
# «waveform-test» (to ".waveform-test")
** Part 0: make sure that you don't have subed
** installed as an Emacs package. If this
** (locate-library "subed.el")
** points to a subed.el outside ~/usrc/subed/,
** then the simplest way to get rid of that
** subed-the-package is to run the right sexps
** from the temporary buffer generated by this:
** (find-epackage-links 'subed)
** Part 1: install the waveform branch of subed
** in ~/usrc/subed/ and run some requires.
**
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
mkdir -p ~/usrc/subed/
rm -Rfv ~/usrc/subed/
cd ~/usrc/
git clone https://github.com/sachac/subed
cd ~/usrc/subed/
export PAGER=cat
git branch --list -a
git for-each-ref
git log --oneline --graph --all -20
# git checkout main
git checkout waveform
# (find-fline "~/usrc/")
# (find-fline "~/usrc/subed/")
# (find-gitk "~/usrc/subed/")
* (code-c-d "subed" "~/usrc/subed/")
* (code-c-d "subeds" "~/usrc/subed/subed/")
** (find-subedfile "")
** (find-subedsfile "")
** (find-subedsh "find * | sort")
** (find-subedssh "find * | sort")
** (find-subedsfile "subed-waveform.el")
** Part 2: prepare a subdirectory in /tmp/
** with a short video in .mp4 and its subtitles
** in .vtt:
** (find-eev2021video "0:00")
** (find-1stclassvideo-links "eev2021")
** (find-psne-1stclassvideo-links "eev2021")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
rm -Rv /tmp/subtest/
mkdir /tmp/subtest/
cd /tmp/subtest/
# wget http://anggtwu.net/eev-videos/emacsconf2021.mp4
# wget http://anggtwu.net/eev-videos/emacsconf2021.vtt
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 .
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt .
# (find-fline "/tmp/subtest/")
# (find-video "/tmp/subtest/emacsconf2021.mp4")
# (find-fline "/tmp/subtest/emacsconf2021.vtt")
** Part 3: require subed-waveform.
*
* (add-to-list 'load-path "~/usrc/subed/subed/")
* (require 'subed)
* (require 'subed-waveform)
* (require 'subed-vtt)
* (require 'svg)
*
** Test: check that some `locate-library's
** return files in ~/usrc/subed/subed/:
** (locate-library "subed.el")
** (locate-library "subed-vtt.el")
** (locate-library "subed-waveform.el")
#####
#
# waveform-test-2
# 2023aug13
#
#####
# «waveform-test-2» (to ".waveform-test-2")
# (find-es "ffmpeg" "waveform")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
rm -Rv /tmp/subtest/
mkdir /tmp/subtest/
cd /tmp/subtest/
# wget http://anggtwu.net/eev-videos/emacsconf2021.mp4
# wget http://anggtwu.net/eev-videos/emacsconf2021.vtt
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 .
cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt .
cd /tmp/subtest/
ffmpeg \
-accurate_seek -ss 5 -to 10 -i emacsconf2021.mp4 \
-loglevel 0 \
-filter_complex volume=2.0,showwavespic=s=400x100:colors=gray \
-frames:v 1 -c:v png -f image2 \
o.png
# (find-fline "/tmp/subtest/o.png")
#####
#
# mpv-geometry
# 2022may02
#
#####
# «mpv-geometry» (to ".mpv-geometry")
# (find-fline "/sda5/videos/" "A_Tour_of_the_Acme_Editor-dP1xVpMPn8M.webm")
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
mpv -geometry 400x300+520+100 /sda5/videos/A_Tour_of_the_Acme_Editor-dP1xVpMPn8M.webm
mpv --geometry=400x300+520+100 /sda5/videos/A_Tour_of_the_Acme_Editor-dP1xVpMPn8M.webm
# (find-customizevariable 'subed-mpv-arguments)
# (find-evariable-links 'subed-mpv-arguments)
# (find-fline "~/.emacs.custom" "subed-mpv-arguments")
#####
#
# pysubs2
# 2022apr19
#
#####
# «pysubs2» (to ".pysubs2")
rm -Rv /tmp/edrx-pict2e/
mkdir /tmp/edrx-pict2e/
cd /tmp/edrx-pict2e/
yt-dlp --write-sub --write-auto-sub \
--skip-download -o 2022-pict2e-lua \
"http://www.youtube.com/watch?v=hiHsUhGVLGM"
wget http://angg.twu.net/eev-videos/2022-pict2e-lua.mp4
ls -lAF
mpv /tmp/edrx-pict2e/2022-pict2e-lua.mp4
#####
#
# gryms-code
# 2022apr19
#
#####
# «gryms-code» (to ".gryms-code")
# (find-yttranscript-links "2022pict2elua" "hiHsUhGVLGM")
# (find-efunction 'find-yttranscript-links)
# (find-fline "~/LOGS/2022apr19.emacs")
# https://bpa.st/OKIQ
* (python-mode)
* (eepitch-python)
* (eepitch-kill)
* (eepitch-python)
import youtube_transcript_downloader
url = "http://www.youtube.com/watch?v=hiHsUhGVLGM"
f = "find-2022pict2eluavideo"
tr = youtube_transcript_downloader.get_transcript(url)
trits0 = tr.items()
trits1 = '\n'.join(('% (' + f + ' "' + key + '" "' + text + '")' for key, text in trits0))
print(trits1)
trits2 = tuple(trits0)
trits3 = zip(trits2, trits2[1:])
vtts = []
for (start,thistext),(stop,nexttext) in trits3:
vtts.append(f"{start} --> {stop}\n{thistext}\n")
header = "WEBVTT\nKind: captions:\nLanguage: en-GB\n"
body = "\n".join((header, *vtts))
print(body)
#####
#
# Subtitles.lua
# 2022apr20
#
#####
# «Subtitles.lua» (to ".Subtitles.lua")
# (find-anggfile "LUA/Subtitles.lua")
#####
#
# aeneas
# 2022nov11
#
#####
# «aeneas» (to ".aeneas")
# (find-fline "~/LOGS/2022nov11.emacs" "<sachac> edrx: oh, you can use aeneas")
# https://www.readbeyond.it/aeneas/
# https://github.com/readbeyond/aeneas
# https://pypi.org/project/aeneas/1.4.0.0/
# https://www.youtube.com/watch?v=xP870sdyCXE
# https://media.emacsconf.org/2022/backstage/editing-captions.html
#####
#
# Fixing the "<>"s in my subtitles class
# 2023apr16
#
#####
# «angle-brackets» (to ".angle-brackets")
# (find-1stclassvideo-links "eev2021")
# (find-eev2021video "0:14")
# https://www.youtube.com/watch?v=qM0Luz78qGw
# (find-yttranscript-links "{c}" "qM0Luz78qGw")
# (find-importlib-links "youtube-transcript-downloader")
# https://github.com/t4skmanag3r/youtube_transcript_downloader/issues/1
#####
#
# waveforms
# 2023jun23
#
#####
# «waveforms» (to ".waveforms")
# https://mbork.pl/2023-06-19_Emacs_Subed_mode_can_now_display_waveforms
# https://github.com/sachac/waveform-el
# (code-c-d "waveform" "~/usrc/waveform-el/")
# (find-waveformfile "")
# (find-waveformfile "waveform.el")
# (load "~/usrc/waveform-el/waveform.el")
# (find-eevvideosfile "" "emacsconf2021")
emacsconf2021.mp4
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
https://sachachua.com/blog/2022/10/subed-el-word-level-timing-improvements/
https://sachachua.com/dotemacs/index.html
(defun my-caption-download-srv2 (id)
(interactive "MID: ")
(require 'subed-word-data)
(when (string-match "v=\\([^&]+\\)" id) (setq id (match-string 1 id)))
(let ((default-directory "/tmp"))
(call-process "yt-dlp" nil nil nil "--write-auto-sub" "--write-sub" "--no-warnings" "--sub-lang" "en" "--skip-download" "--sub-format" "srv2"
(concat "https://youtu.be/" id))
(subed-word-data-load-from-file (my-latest-file "/tmp" "\\.srv2\\'"))))
(defun my-caption-fix-common-errors (data)
(mapc (lambda (o)
(mapc (lambda (e)
(when (string-match (concat "\\<" (regexp-opt (if (listp e) (seq-remove (lambda (s) (string= "" s)) e)
(list e)))
"\\>")
(alist-get 'text o))
(map-put! o 'text (replace-match (car (if (listp e) e (list e))) t t (alist-get 'text o)))))
my-subed-common-edits))
data))
https://news.ycombinator.com/item?id=34105063 BBC Subtitle Guidelines (bbc.co.uk)
https://sachachua.com/dotemacs/
https://sachachua.com/dotemacs/#subed
https://melpa.org/#/opensub
# (find-eevvideosfile "2022jul10-apresentacao-C2.vtt")
http://mbork.pl/2022-09-05_Comments_in_srt_files
<sachac> edrx: set subed-auto-find-video to nil
# (find-fline "~/LOGS/2023jan03.emacs" "emacsconf-subed")
https://sachachua.com/blog/2023/12/using-subed-record-in-emacs-to-edit-audio-and-clean-up-oopses/
https://thesquareplanet.com/blog/ai-captioning/
# Local Variables:
# coding: utf-8-unix
# End: