|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- This file:
-- http://angg.twu.net/youtube-db/toolbox.lua
-- http://angg.twu.net/youtube-db/toolbox.lua.html
-- (find-angg "youtube-db/toolbox.lua")
--
-- Note that it uses functions from here:
-- http://angg.twu.net/youtube-db/edrxlib.lua
-- http://angg.twu.net/youtube-db/edrxlib.lua.html
-- (find-angg "youtube-db/edrxlib.lua")
-- (find-angg "LUA/lua50init.lua")
-- and I use eepitch to use this script interactively, as a toolbox...
-- (find-eepitch-intro)
-- I use this to keep a big archive of videos:
-- http://angg.twu.net/linkdasruas.html
-- http://angg.twu.net/linkdasruas2.html
-- The docs are here, but they are in Portuguese:
-- http://angg.twu.net/ferramentas-para-ativistas.html
-- If this looks interesting to you, PLEASE GET IN TOUCH!!!!
-- eduardoochs@gmail.com
-- «.basic-tools» (to "basic-tools")
-- «.bigstrs» (to "bigstrs")
-- «.video-fnames» (to "video-fnames")
-- «.dates» (to "dates")
-- «.shell-functions» (to "shell-functions")
-- «.YScripts» (to "YScripts")
-- «.YoutubeDB» (to "YoutubeDB")
-- «.YoutubeDB-add» (to "YoutubeDB-add")
-- «.YoutubeDB-register» (to "YoutubeDB-register")
-- «.YoutubeDB-register-fnames» (to "YoutubeDB-register-fnames")
-- «.YoutubeDB-read-run-ls» (to "YoutubeDB-read-run-ls")
-- «.YoutubeDB-traverse» (to "YoutubeDB-traverse")
-- «.YoutubeDB-gets» (to "YoutubeDB-gets")
-- «.YoutubeDB-dump» (to "YoutubeDB-dump")
-- «.YoutubeDB-missing-titles» (to "YoutubeDB-missing-titles")
-- «.YoutubeDB-missing-dates» (to "YoutubeDB-missing-dates")
-- «.YoutubeDB-dates_cat» (to "YoutubeDB-dates_cat")
-- «.YoutubeDB-titles_cat» (to "YoutubeDB-titles_cat")
-- «.YoutubeDB-script_cp_angg» (to "YoutubeDB-script_cp_angg")
-- «.YoutubeDB-script_dl_angg» (to "YoutubeDB-script_dl_angg")
-- «.YoutubeDB-dates-titles-cat» (to "YoutubeDB-dates-titles-cat")
-- «.YoutubeDB-filter» (to "YoutubeDB-filter")
-- «.YoutubeDB-copy-dates-into» (to "YoutubeDB-copy-dates-into")
-- «.YoutubeDB-txt2_line» (to "YoutubeDB-txt2_line")
-- «.YoutubeDB-line-to-fname» (to "YoutubeDB-line-to-fname")
-- «.template-dynamic.html» (to "template-dynamic.html")
-- «.write_html_dynamic» (to "write_html_dynamic")
-- «.simple_toplevel» (to "simple_toplevel")
-- «.simple_toplevel2» (to "simple_toplevel2")
-- «basic-tools» (to ".basic-tools")
eval = function (str) return assert(loadstring(str))() end
-- if arg and arg[1] then
-- local cmd = arg[1]
-- if cmd == "-sort_td" then sort_by_tag_and_date(arg[2])
-- else eval(arg[1])
-- end
-- end
identity = function (...) return ... end
-- (find-es "lua5" "cow-and-coy")
coy = coroutine.yield
cow = coroutine.wrap
-- (find-blogme3file "youtube.lua" "meta_first_time")
meta_first_time = function ()
local T = {}
return function (hash)
if not T[hash] -- if hash is not yet in T
then T[hash] = "present"; return true
else return false
end
end
end
-- like "cat L | sort | uniq"
sort_uniq = function (L) return Set.from(L):ks() end
ppfo = function ()
local L = {}
local print = function (str) table.insert(L, str.."\n") end
local printf = function (fmt, ...) table.insert(L, format(fmt, ...)) end
local output = function () return table.concat(L) end
return print, printf, output
end
no_coding = function (str)
if str:match "^[^\n]*coding:[^\n]*\n" then
return (str:match "^[^\n]*\n(.*)")
else return str
end
end
-- ____ _ _
-- | __ )(_) __ _ ___| |_ _ __ ___
-- | _ \| |/ _` / __| __| '__/ __|
-- | |_) | | (_| \__ \ |_| | \__ \
-- |____/|_|\__, |___/\__|_| |___/
-- |___/
--
-- String traversers, usually for the contents of files.
-- gen_nonempty_lines: for each non-empty line in bigstr, yield it.
-- gen_tag_hash_title: for each youtube-ish line in bigstr, yield
-- "tag,hash,title". See:
-- (find-blogme3file "youtube.lua" "gen_video_files")
-- gen_hash_fname_dir_stem: for each line like
-- "dir/title-youtubeid.mp4" in bigstr, yield "hash,fname,dir,stem".
--
-- The code calls this: (find-angg "LUA/lua50init.lua" "youtube_split")
--
-- «bigstrs» (to ".bigstrs")
--
gen_nonempty_lines = function (bigstr)
return bigstr:gmatch("[^\n]+")
end
-- gen_tag_hash_title = function (bigstr) -- bigstr has video urls with titles
-- return cow(function ()
-- for line in gen_nonempty_lines(bigstr) do
-- local a, hash, b, title, c = youtube_split_url0(line)
-- -- For each youtube-ish line yield tag,hash,title
-- if a then
-- local tag = a:match("%[(.*)%]")
-- coy(tag or "", hash, title)
-- end
-- end
-- end)
-- end
gen_tag_hash_title = function (bigstr) -- bigstr has video urls with titles
return cow(function ()
for line in gen_nonempty_lines(bigstr) do
local pre, hash, b, title, c = youtube_split_url0(line)
-- For each youtube-ish line yield tag,hash,title,dates
if pre then
local tag, dates = pre_to_tag_and_dates(pre)
coy(tag or "", hash, title, dates)
end
end
end)
end
gen_hash_fname_dir_stem = function (bigstr) -- bigstr is a list of .mp4s
return cow(function ()
for fname in gen_nonempty_lines(bigstr) do
-- local a, hash = fname:match "^(.-)-(...........)%.mp4$"
local a, hash, ext = fname:match "^(.-)-(...........)%.([mf][pl][4v])$"
if a then
local dir, stem = a:match "^(.*/)([^/]*)$"
coy(hash, fname, dir or "", stem or a)
end
end
end)
end
-- _
-- | |___
-- | / __|
-- | \__ \
-- |_|___/
--
-- «video-fnames» (to ".video-fnames")
-- video_extensions = {mp4=true}
-- video_extension = function (ext, exts) end
-- video_fname_split = function (fname) end
-- gen_video_fnames = function (bigstr, exts) end
-- gen_video_fnames_ls = function (dir, exts) end
-- register_video_fnames = function (bigstr, exts) end
-- register_video_fnames_ls = function (dir, exts) end
video_extensions = {mp4=true}
video_extension = function (ext, exts)
return ext and (exts or video_extensions)[ext]
end
video_fname_split = function (fname)
if not fname then return end
local pat = "^(.-)-(...........)%.([a-z0-9]+)$"
local a, hash, ext = fname:match(pat)
if not a then return end
local dir, stem = a:match "^(.*/)([^/]*)$"
if not dir then dir,stem = "",a end
return dir, stem, hash, ext
end
gen_video_fnames = function (bigstr, exts)
return cow(function ()
for fname in gen_nonempty_lines(bigstr) do
local dir, stem, hash, ext = video_fname_split(fname)
if video_extension(ext, exts) then
coy(fname, dir, stem, hash, ext)
end
end
end)
end
gen_video_fnames_ls = function (dir, exts)
return cow(function ()
for i,fname0 in ipairs(sorted(ee_ls(dir) or {})) do
-- print(i, fname0)
local fname = dir..fname0
local dir, stem, hash, ext = video_fname_split(fname)
if video_extension(ext, exts) then
coy(fname, dir, stem, hash, ext)
end
end
end)
end
-- ____ _
-- | _ \ __ _| |_ ___ ___
-- | | | |/ _` | __/ _ \/ __|
-- | |_| | (_| | || __/\__ \
-- |____/ \__,_|\__\___||___/
--
-- Some support for dates. The format is usually like this:
-- (find-anggfile "linkdasruas.txt" "26/out/2013")
-- Something like "26/out/2013" is a "bdate" (a Brazilian date).
-- Something like "20131016" is an "ndate" (a numeric date).
-- «dates» (to ".dates")
month_numbers = { jan=1, feb=2, fev=2, mar=3, abr=4, apr=4,
mai=5, may=5, jun=6, jul=7, ago=8, aug=8, set=9, sep=9,
out=10, oct=10, nov=11, dez=12 }
--
bdate_to_ndate = function (bdate)
local dd,mmm,yyyy = bdate:match "^([0-9]+)/([A-Za-z]+)/([0-9]+)$"
if not dd then return end
local mm = month_numbers[mmm]
-- PP(dd, mmm, mm)
if mm then return format("%04d%02d%02d", yyyy, mm, dd) end
end
date_to_ndate = function (date)
if date:match "^[0-9]+$" and #date==8 then return date end
return bdate_to_ndate(date)
end
pre_to_tag_and_dates = function (pre)
local tag = ""
local dates = {}
for _,word in ipairs(split(pre)) do
local t = word:match "^%[(.-)%]$"
if t then tag = t else
local d = date_to_ndate(word)
if d then table.insert(dates, d) end
end
end
return tag, dates
end
-- line_to_tag_and_dates = function (line)
-- local a,u = line:match "^(.-)(http.*)$"
-- if not a then return end
-- end
--
line_to_tag_and_date = function (line)
local a,u = line:match "^(.-)(http.*)$"
if not a then return end
local t,_,r = a:match "^(%[(.-)%]) *(.*)$"
-- PP(t,_,s,r)
local tag = t or ""
local date = (r and bdate_to_ndate(r)) or ""
return tag, date
end
--
-- See: (find-angg ".emacs" "youtube-db.lua")
-- This is for editing (find-angg "linkdasruas.txt")
sort_by_tag_and_date0 = function (bigstr)
local L = {}
for i,line in ipairs(splitlines(bigstr)) do
local tag, date = line_to_tag_and_date(line)
local key = format("%s %s %05d", (tag or ""), (date or ""), i)
-- PP(key)
table.insert(L, {key, line})
-- PP(L[#L])
end
local f = function (a, b) return a[1] < b[1] end
table.sort(L, f)
local g = function (lk) return lk[2].."\n" end
return mapconcat(g, L)
end
sort_by_tag_and_date = function (fname)
local bigstr = fname and ee_readfile(fname) or io.read "*a"
io.write(sort_by_tag_and_date0(bigstr))
end
-- _ _ _
-- ___| |__ ___| | |
-- / __| '_ \ / _ \ | |
-- \__ \ | | | __/ | |
-- |___/_| |_|\___|_|_|
--
-- «shell-functions» (to ".shell-functions")
--[==[
download_script0 = function (options)
return (([[
ydl () {(
cd $mp4dir/ &&
youtube-dl -t -f 18 --restrict-filenames "http://www.youtube.com/watch?v=$1"
)}
ydl () {
echo -n > "$mp4dir/Fake_video-$1.mp4"
}
]]):gsub("$(%w+)", options))
end
title_script0 = function (options)
return (([[
mkdir $tmpdir/
ytitle () {
youtube-dl -e "http://www.youtube.com/watch?v=$2" > $tmpdir/$2.title0
recode h..l1 < $tmpdir/$2.title0 > $tmpdir/$2.title
echo $1 $2 $(cat $tmpdir/$2.title)
}
ytitle () {
echo "Fake title" > $tmpdir/$2.title
echo $1 $2 $(cat $tmpdir/$2.title)
}
]]):gsub("$(%w+)", options))
end
date_script0 = function (options)
return (([[
mkdir $tmpdir/
ydate () {
youtube-dl --get-filename -o "%(upload_date)s" \
"http://www.youtube.com/watch?v=$2" > $tmpdir/$2.date
}
ydate () {
echo 20001234 > $tmpdir/$2.date
echo $1 $2 $(cat $tmpdir/$2.date)
}
]]):gsub("$(%w+)", options))
end
title_cat = function (hash, options)
local dir = (options and options.tmpdir) or "/tmp/ydbtmp"
local fname = dir.."/"..hash..".title"
local ok,contents = pcall(function () return readfile(fname) end)
if ok then return (contents:gsub("\n$", "")) end
end
date_cat = function (hash, options)
local dir = (options and options.tmpdir) or "/tmp/ydbtmp"
local fname = dir.."/"..hash..".date"
local ok,contents = pcall(function () return readfile(fname) end)
if ok then return (contents:gsub("\n$", "")) end
end
--]==]
filecontents = function (fname)
local ok,contents = pcall(function () return readfile(fname) end)
if ok then
contents = contents:gsub("\n$", "")
if contents ~= "" then
return contents
end
end
end
-- __ ______ _ _
-- \ \ / / ___| ___ _ __(_)_ __ | |_ ___
-- \ V /\___ \ / __| '__| | '_ \| __/ __|
-- | | ___) | (__| | | | |_) | |_\__ \
-- |_| |____/ \___|_| |_| .__/ \__|___/
-- |_|
--
-- «YScripts» (to ".YScripts")
YScripts = Class {
type = "YScripts",
new = function (A)
return YScripts(A or {})
end,
fake = function (A) A = A or {}; A.fake = 1; return YScripts(A) end,
real = function (A) A = A or {}; return YScripts(A) end,
__index = {
expand = function (ys, fmt)
return (fmt:gsub("$(%w+)", ys))
end,
--
ywatch = "http://www.youtube.com/watch",
mp4dir = "videos", -- without trailing "/"
tmpdir = "/tmp/ydbtmp", -- without trailing "/"
sleep_real = "sleep 150",
-- sleep_real = "wait",
sleep_fake = "# sleep 150",
nprocesses = 50,
cpdir = nil,
--
write_script = function (ys, fname, bigstr)
ee_writefile(fname, bigstr)
end,
line = function (ys, fmt) return ys:expand(fmt.."\n") end,
lines = function (ys, fmts)
return mapconcat(function (fmt) return ys:line(fmt) end, fmts)
end,
hash_line = function (ys, fmt, h) ys.h = h; return ys:expand(fmt.."\n") end,
hash_lines0 = function (ys, fmt, hashes)
return map(function (h) return ys:hash_line(fmt, h) end, hashes)
end,
fname_lines = function (ys, fmt, fnames)
local f = function (fname)
ys.fname = fname
return ys:expand(fmt.."\n")
end
return mapconcat(f, fnames)
end,
with_sleep = function (ys, lines, sleep)
if sleep then
local i = ys.nprocesses + 1
while i <= #lines do
table.insert(lines, i, sleep.."\n")
i = i + ys.nprocesses + 1
end
end
return table.concat(lines)
end,
script = function (ys, defs, fmt, sleep, hashes)
local s0 = ys:lines(defs)
local hl = ys:hash_lines0(fmt, hashes)
local s1 = ys:with_sleep(hl, sleep)
return s0..s1
end,
cp_mp4 = function (ys, fnames)
return ys:fname_lines("cp -sv $fname $cpdir", fnames)
end,
dl_mp4 = function (ys, hashes)
return ys:script(ys.fake and ys.dl_mp4_fake or ys.dl_mp4_real,
"ydl_mp4 $h",
nil,
hashes)
end,
dl_date = function (ys, hashes)
return ys:script(ys.fake and ys.dl_date_fake or ys.dl_date_real,
ys.fake and "ydl_date $h" or "ydl_date $h &",
ys.fake and ys.sleep_fake or ys.sleep_real,
hashes)
end,
dl_title = function (ys, hashes)
return ys:script(ys.fake and ys.dl_title_fake or ys.dl_title_real,
ys.fake and "ydl_title $h" or "ydl_title $h &",
ys.fake and ys.sleep_fake or ys.sleep_real,
hashes)
end,
dl_mp4_fake = {
'mkdir $mp4dir/',
'ydl_mp4 () {',
' echo -n > "$mp4dir/Fake_video-$1.mp4"',
'}',
},
dl_mp4_real = {
'mkdir $mp4dir/',
'ydl_mp4 () {(',
' cd $mp4dir/ &&',
' youtube-dl -t -f 18 --restrict-filenames "$ywatch?v=$1"',
')}',
},
dl_date_fake = {
'mkdir $tmpdir/',
'ydl_date () {',
' echo 20001234 > $tmpdir/$1.date',
' echo $1 $(cat $tmpdir/$1.date)',
'}',
},
dl_date_real = {
'mkdir $tmpdir/',
'ydl_date () {',
' youtube-dl --get-filename -o "%(upload_date)s" \\',
' "$ywatch?v=$1" > $tmpdir/$1.date',
'}',
},
dl_title_fake = {
'mkdir $tmpdir/',
'ydl_title () {',
' echo "Fake title" > $tmpdir/$1.title',
' echo $1 $(cat $tmpdir/$1.title)',
'}',
},
dl_title_real = {
'mkdir $tmpdir/',
'ydl_title () {',
' youtube-dl -e "$ywatch?v=$1" > $tmpdir/$1.title',
' echo $1 $(cat $tmpdir/$1.title)',
'}',
},
date_cat = function (ys, hash)
return hash and filecontents(ys.tmpdir.."/"..hash..".date")
end,
title_cat = function (ys, hash)
return hash and filecontents(ys.tmpdir.."/"..hash..".title")
end,
},
}
-- __ __ _ _ ____ ____
-- \ \ / /__ _ _| |_ _ _| |__ ___| _ \| __ )
-- \ V / _ \| | | | __| | | | '_ \ / _ \ | | | _ \
-- | | (_) | |_| | |_| |_| | |_) | __/ |_| | |_) |
-- |_|\___/ \__,_|\__|\__,_|_.__/ \___|____/|____/
--
-- «YoutubeDB» (to ".YoutubeDB")
YoutubeDB = Class {
type = "YoutubeDB",
new = function ()
return YoutubeDB {hashes = {}, hashes_list = {}}
end,
__tostring = function (ydb)
return format("(YoutubeDB with %d hashes in hashes_list)",
#ydb.hashes_list)
end,
__mul = function (ydb, ydb2)
return ydb:filter(function (hash,e) return ydb2:has(hash) end)
end,
__sub = function (ydb, ydb2)
return ydb:filter(function (hash,e) return not ydb2:has(hash) end)
end,
__index = {
has = function (ydb, hash) return ydb.hashes[hash] end,
has_hash = function (ydb, hash) return ydb.hashes[hash] end,
first_date = function (ydb, hash) return ydb.hashes[hash].dates[1] end,
first_title = function (ydb, hash) return ydb.hashes[hash].titles[1] end,
first_fname = function (ydb, hash) return ydb.hashes[hash].fnames[1] end,
first_hash = function (ydb) return ydb.hashes_list[1] end,
n = function (ydb) return #(keys(ydb.hashes)) end,
n0 = function (ydb) return #(ydb.hashes_list) end,
nrepetitions = function (ydb) return ydb:n0() - ydb:n() end,
hsorted = function (ydb) return sorted(keys(ydb.hashes)) end,
--
-- very-low-level stuff, experimental
-- «YoutubeDB-add» (to ".YoutubeDB-add")
add_hash = function (ydb, hash)
if not ydb.hashes[hash] then
ydb.hashes[hash] = {tags={}, titles={}, fnames={}, dates={}}
table.insert(ydb.hashes_list, hash)
end
end,
add_tag = function (ydb, hash, tag)
if tag then ydb.hashes[hash].tags[tag] = tag end
end,
add_tags = function (ydb, hash, tags)
for tag in (tags or ""):gmatch("[^,]+") do ydb:add_tag(hash, tag) end
end,
add_title = function (ydb, hash, title)
if title and title ~= "" and title ~= ydb.hashes[hash].titles[1] then
table.insert(ydb.hashes[hash].titles, title)
end
end,
add_date = function (ydb, hash, date)
if date then table.insert(ydb.hashes[hash].dates, date) end
end,
add_dates = function (ydb, hash, dates)
for i,date in ipairs(dates or {}) do ydb:add_date(hash, date) end
end,
--
-- Functions to store more entries into the database
-- «YoutubeDB-register» (to ".YoutubeDB-register")
register_video_hash = function (ydb, hash) -- became "add_hash"
if not ydb.hashes[hash] then
ydb.hashes[hash] = {tags={}, titles={}, fnames={}, dates={}}
table.insert(ydb.hashes_list, hash)
end
end,
register_video = function (ydb, tags, hash, title, dates)
ydb:register_video_hash(hash)
ydb:add_tags (hash, tags)
ydb:add_title(hash, title)
ydb:add_dates(hash, dates)
end,
register_videos = function (ydb, bigstr, forced_tag)
bigstr = bigstr:gsub("/youtu%.be/", "/www.youtube.com/watch?v=") -- ***
bigstr = bigstr:gsub("/shorts/", "watch?v=") -- ***
for tag,hash,title,dates in gen_tag_hash_title(bigstr) do
ydb:register_video(forced_tag or tag, hash, title, dates)
end
return ydb
end,
register_videos_read = function (ydb, fname, forced_tag)
return ydb:register_videos(readfile(ee_expand(fname)), forced_tag)
end,
register_videos_run = function (ydb, script, forced_tag)
return ydb:register_videos(getoutput(script), forced_tag)
end,
--
-- «YoutubeDB-register-fnames» (to ".YoutubeDB-register-fnames")
register_video_fname = function (ydb, hash, fname)
ydb:register_video_hash(hash)
local fnames = ydb.hashes[hash].fnames
table.insert(fnames, fname)
end,
register_video_fnames = function (ydb, bigstr, exts)
for fname,dir,stem,hash,ext in gen_video_fnames(bigstr, exts) do
ydb:register_video_fname(hash, fname)
end
return ydb
end,
register_video_fnames_ls = function (ydb, dir, exts)
for fname,dir,stem,hash,ext in gen_video_fnames_ls(dir, exts) do
ydb:register_video_fname(hash, fname)
end
return ydb
end,
--
-- Shorthands for register_xxx methods:
-- «YoutubeDB-read-run-ls» (to ".YoutubeDB-read-run-ls")
read0 = function (ydb, bigstr, forced_tag)
return ydb:register_videos(bigstr, forced_tag)
end,
read = function (ydb, fname, forced_tag)
return ydb:register_videos_read(fname, forced_tag)
end,
run = function (ydb, script, forced_tag)
return ydb:register_videos_run(script, forced_tag)
end,
ls0 = function (ydb, bigstr, exts)
return ydb:register_video_fnames(bigstr, exts)
end,
ls = function (ydb, dir, exts)
return ydb:register_video_fnames_ls(dir, exts)
end,
--
-- Low-level functions to traverse the database
-- «YoutubeDB-traverse» (to ".YoutubeDB-traverse")
gen_hashes = function (ydb)
return cow(function ()
local first_time = meta_first_time()
for _,hash in ipairs(ydb.hashes_list) do
if first_time(hash) then
coy(hash, ydb.hashes[hash])
end
end
end)
end,
gen_hash_tags_titles = function (ydb)
return cow(function ()
for hash,entry in ydb:gen_hashes() do
coy(hash, entry.tags, entry.titles)
end
end)
end,
-- gen_hashes_without_titles = function (ydb)
-- return cow(function ()
-- for hash,tags,titles in ydb:gen_hash_tags_titles() do
-- if #titles == 0 then coy(hash) end
-- end
-- end)
-- end,
-- hashes_without_titles = function (ydb)
-- local Hs = {}
-- for hash,tags,titles in ydb:gen_hash_tags_titles() do
-- if #titles == 0 and #tags == 0 then table.insert(Hs, hash) end
-- end
-- return Hs
-- end,
hashes_without_titles = function (ydb)
return ydb:title_no().hashes_list
end,
--
-- Functions that return things from the database (as strings)
-- «YoutubeDB-gets» (to ".YoutubeDB-gets")
atags = function (ydb, hash)
return table.concat(sorted(keys(ydb.hashes[hash].tags)), ",")
end,
adates = function (ydb, hash)
local ds = Set.from(ydb.hashes[hash].dates):ks()
return mapconcat(function (d) return d.." " end, ds)
end,
hash_to_line = function (ydb, hash, nl)
local ak = ydb:atags(hash)
local ak2 = (ak == "") and "" or "["..ak.."]"
return ak2.." "..ydb:adates(hash)..
youtube_make_url(hash) .." "..
(ydb.hashes[hash].titles[1] or "")..
(nl or "")
end,
hash_to_line_nl = function (ydb, hash)
return ydb:hash_to_line(hash, "\n")
end,
--
all_hashes = function (f)
local L = {}
f = f or identity
for hash in ydb:gen_hash_tags_titles() do
table.insert(L, f(hash))
end
return L
end,
all_by_tags = function (ydb)
local A = {}
for hash,tags,titles in ydb:gen_hash_tags_titles() do
local key = ydb:atags(hash) -- like "", "a", "p,i", etc
if not A[key] then A[key] = {} end
table.insert(A[key], hash)
end
return A
end,
gen_all_by_tags = function (ydb)
return cow(function ()
local A = ydb:all_by_tags()
local ks = sorted(keys(A))
for i,key in ipairs(ks) do
local list_of_hashes = A[key]
coy(key, list_of_hashes)
end
end)
end,
--
-- «YoutubeDB-dump» (to ".YoutubeDB-dump")
dump_tags = function (ydb)
for tag,listofhashes in ydb:gen_all_by_tags() do
print(tag, #listofhashes) -- print tag and how many items it has
end
end,
-- Print all items whose tags are in a list L (default: all tags)
dump_by_tags = function (ydb, L)
local allbytags = ydb:all_by_tags()
L = L or keys(allbytags)
for _,t in ipairs(L) do
for _,h in ipairs(allbytags[t]) do
print(ydb:hash_to_line(h))
end
end
end,
dump_all = function (ydb)
for hash,tags,titles in ydb:gen_hash_tags_titles() do
print(ydb:hash_to_line(hash))
end
end,
dump_untagged = function (ydb)
local f = function (hash)
if ydb:atags(hash) == "" then
return ydb:hash_to_line(hash, "\n")
end
return nil
end
return table.concat(ydb:all_hashes(f))
end,
--
-- Scripts to get the missing titles
-- «YoutubeDB-missing-titles» (to ".YoutubeDB-missing-titles")
-- was: "script_yt ="
new_titles_script = function (ydb, options)
local pr, prf, out = ppfo()
local Hs = sorted(ydb:title_no():hash_valid().hashes_list)
for j=1,#Hs,50 do
pr("# "..(j-1))
for i=j,min(j+49,#Hs) do
local c = (#Hs[i] == 11) and "" or "# "
pr(c.."ytitle "..i.." "..Hs[i].." &")
end
pr("# sleep 150")
pr("")
end
return title_script0(options)..out()
end,
new_titles_dump = function (ydb)
local pr, prf, out = ppfo()
for _,hash in ipairs(ydb:title_no():hash_valid().hashes_list) do
local title = title_cat(hash)
if title and title ~= "" then
pr(" "..youtube_make_url(hash).." "..title)
end
end
return out(lines)
end,
--
-- Scripts to get the missing dates
-- «YoutubeDB-missing-dates» (to ".YoutubeDB-missing-dates")
-- Work in progress... see: (find-es "youtube" "upload-date")
new_dates_script = function (ydb, options)
local pr, prf, out = ppfo()
local Hs = sorted(ydb:date_no():hash_valid().hashes_list)
for j=1,#Hs,50 do
pr("# "..(j-1))
for i=j,min(j+49,#Hs) do
local c = (#Hs[i] == 11) and "" or "# "
pr(c.."ydate "..i.." "..Hs[i].." &")
end
pr("# sleep 150")
pr("")
end
return date_script0(options)..out()
end,
new_dates_dump = function (ydb)
local pr, prf, out = ppfo()
for _,hash in ipairs(ydb:date_no():hash_valid().hashes_list) do
local date = date_cat(hash)
if date and date ~= "" then
pr(" "..date.." "..youtube_make_url(hash))
end
end
return out(lines)
end,
--
-- «YoutubeDB-dates_cat» (to ".YoutubeDB-dates_cat")
-- «YoutubeDB-titles_cat» (to ".YoutubeDB-titles_cat")
-- These functions use the global variable "ys" (a YScript).
dates_cat = function (ydb)
for hash,entry in pairs(ydb.hashes) do
ydb:add_date(hash, ys:date_cat(hash))
end
return ydb
end,
titles_cat = function (ydb)
for hash,entry in pairs(ydb.hashes) do
ydb:add_title(hash, ys:title_cat(hash))
end
return ydb
end,
cp_script = function (ydb)
return ys:cp_mp4(ydb:fnames_list())
end,
mp4s_script = function (ydb)
return ys:dl_mp4(ydb:hsorted())
end,
dates_script = function (ydb)
return ys:dl_date(ydb:hsorted())
end,
titles_script = function (ydb, fname)
local str = ys:dl_title(ydb:hsorted())
if fname then
ee_writefile(fname, str)
return format("(Wrote %d bytes into %s)", #str, fname)
end
-- return ys:dl_title(ydb:hsorted())
return str
end,
--
-- «YoutubeDB-script_cp_angg» (to ".YoutubeDB-script_cp_angg")
-- «YoutubeDB-script_dl_angg» (to ".YoutubeDB-script_dl_angg")
-- h = function (ydb) return keys(ydb.hashes) end
h = function (ydb)
return Set.fromarray(ydb.hashes_list)
end,
video_fnames_list0 = function (ydb, hashes)
local mp4s = {}
for hash in hashes:gen() do
table.insert(mp4s, ydb.hashes[hash].fnames[1])
end
return sorted(mp4s)
end,
video_fnames_list = function (ydb, hashes)
local f = function (fname) return fname.."\n" end
return mapconcat(f, ydb:video_fnames_list0(hashes))
end,
script_copy_video_files = function(ydb, hashes, targetdir, switches)
switches = switches or "-sv"
targetdir = targetdir or "."
local f = function (fname)
return format("cp %s %s %s\n", switches, fname, targetdir)
end
return mapconcat(f, ydb:video_fnames_list0(hashes))
end,
script_cp_angg = function(ydb, hashes)
print("Copies to make: "..hashes:n())
return "cd ~/TH/L/manifs/\n"..ydb:script_copy_video_files(hashes)
end,
script_dl_angg = function(ydb, hashes, dir, options)
-- print("Downloads to make: "..hashes:n())
local f = function (hash) return "ydl "..hash.."\n" end
return "cd "..(dir or "/sda5/videos/manifs/").."\n"..
download_script0(options)..
mapconcat(f, hashes:ks())
end,
--
-- 2014jul23
-- «YoutubeDB-dates-titles-cat» (to ".YoutubeDB-dates-titles-cat")
-- dates_cat = function (ydb, options)
-- for hash,entry in pairs(ydb.hashes) do
-- ydb:add_date(hash, date_cat(hash, options))
-- end
-- return ydb
-- end,
-- titles_cat = function (ydb, options)
-- for hash,entry in pairs(ydb.hashes) do
-- ydb:add_title(hash, title_cat(hash, options))
-- end
-- return ydb
-- end,
fnames_list = function (ydb)
local L = {}
for hash,entry in pairs(ydb.hashes) do
local fname = entry.fnames[1]
if not fname then error(hash.." has no fname") end
table.insert(L, fname)
end
return sorted(L)
end,
--
write_lst0 = function (ydb)
local mp4s = {}
for hash,entry in pairs(ydb.hashes) do
local fname = ydb.hashes[hash].fnames[1]
if fname then table.insert(mp4s, fname) end
end
table.sort(mp4s)
local f = function (fname) return fname.."\n" end
return mapconcat(f, mp4s)
end,
write_lst = function (ydb, fname)
ee_writefile(fname, ydb:write_lst0())
end,
write_script_dl = function (ydb, fname, dir, options)
ee_writefile(fname, ydb:script_dl_angg(ydb:h(), dir, options))
end,
write_script_gd = function (ydb, fname, options)
ee_writefile(fname, ydb:new_dates_script(options))
end,
write_script_gt = function (ydb, fname, options)
ee_writefile(fname, ydb:new_titles_script(options))
end,
write_new_txt = function (ydb, fname1, fname2)
local bigstr1 = ee_readfile(fname1)
-- local bigstr2 = ydb:copy_dates_into(bigstr1)
local bigstr2 = ydb:txt_to_txt2(bigstr1)
ee_writefile(fname2, bigstr2)
end,
--
write_script_cp0 = function (ydb, targetdir)
local f = function (fname)
return format("cp -sv %s %s\n", fname, targetdir or ".")
end
return mapconcat(f, ydb:fnames_list())
end,
write_script_cp = function (ydb, fname, targetdir)
ee_writefile(fname, ydb:write_script_cp0(targetdir))
end,
--
-- «YoutubeDB-filter» (to ".YoutubeDB-filter")
-- Note that all filters create "shallow subsets" of their input
-- ydbs! If we do
-- ydb2 = ydb:filter(function (h,e) ... end)
-- and both ydb and ydb2 have entries for "agiven_hash", then
-- ydb.hashes["agiven_hash"] and ydb2.hashes["agiven_hash"] are
-- the same table, and changing one changes the other one too!
filter = function (ydb, f)
local ydb2 = YoutubeDB.new()
for hash in ydb:gen_hashes() do
local entry = ydb.hashes[hash]
if f(hash, entry) then
ydb2.hashes[hash] = entry
table.insert(ydb2.hashes_list, hash)
end
end
return ydb2
end,
err_yes = function (ydb)
return ydb:filter(L "h,e -> e.tags.err")
end,
err_no = function (ydb)
return ydb:filter(L "h,e -> not e.tags.err")
end,
title_yes = function (ydb)
return ydb:filter(L "h,e -> #e.titles > 0")
end,
title_no = function (ydb)
return ydb:filter(L "h,e -> #e.titles == 0")
end,
date_yes = function (ydb)
return ydb:filter(L "h,e -> #e.dates > 0")
end,
date_no = function (ydb)
return ydb:filter(L "h,e -> #e.dates == 0")
end,
tag_yes = function (ydb, tag)
return ydb:filter(function (h,e) return e.tags[tag] end)
end,
tag_no = function (ydb, tag)
return ydb:filter(function (h,e) return not e.tags[tag] end)
end,
tags_eq = function (ydb, tags)
return ydb:filter(function (h,e) return ydb:atags(h) == tags end)
end,
hash_valid = function (ydb)
return ydb:filter(L "h,e -> #h == 11")
end,
--
del_tag = function (ydb, tag) -- beware of the shall copy thing!!!
for hash,entry in pairs(ydb.hashes) do entry.tags[tag] = nil end
return ydb
end,
--
-- «YoutubeDB-copy-dates-into» (to ".YoutubeDB-copy-dates-into")
copy_date_into_line = function (ydb, li)
local yli = YoutubeDB.new():register_videos(li)
if #yli.hashes_list == 0 then
return li
else
local hash = yli.hashes_list[1]
if ydb:has(hash) then
for i,d in ipairs(sort_uniq(ydb.hashes[hash].dates)) do
yli:add_date(hash, d)
end
end
return yli:hash_to_line(hash)
end
end,
copy_dates_into = function (ydb, bigstr)
local f = function (li) return ydb:copy_date_into_line(li) end
return (bigstr:gsub("([^\n]+)", f))
end,
--
-- «YoutubeDB-txt2_line» (to ".YoutubeDB-txt2_line")
-- This is a hack used to produce a myvideos.txt2 from a myvideos.txt
txt2_line = function (ydb, li)
local yli = YoutubeDB.new():read0(li)
local hash = yli:first_hash()
if not hash then return li end
if not ydb:has(hash) then return li end
return ydb:hash_to_line(hash)
end,
txt_to_txt2 = function (ydb, bigstr)
local f = function (li) return ydb:txt2_line(li) end
return (bigstr:gsub("([^\n]+)", f))
end,
--
-- «YoutubeDB-line-to-fname» (to ".YoutubeDB-line-to-fname")
url_to_fname = function (ydb, url)
local ydbli = YoutubeDB.new():read0(url)
local hash = ydbli.hashes_list[1]; if not hash then return end
local entry = ydb.hashes[hash]; if not entry then return end
local fname = entry.fnames[1]; if not fname then return end
return fname
end,
},
}
-- «template-dynamic.html» (to ".template-dynamic.html")
--[==[
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
# (find-TH "linkdasruas2")
cat > /tmp/template-dynamic.blogme <<'%%%'
[htmlize [J $TITLE]
[VIDEO_INDEX link_das_ruas
$TXT
$LST
]
]
%%%
# makeL
cd /tmp/
lua51 ~/blogme3/blogme3.lua \
-o /tmp/template-dynamic.html \
-i /tmp/template-dynamic.blogme
# (find-fline "/tmp/template-dynamic.html")
cp -v /tmp/template-dynamic.html ~/youtube-db/
# (find-ydb "template-dynamic.html")
--]==]
-- «bug»
-- «write_html_dynamic» (to ".write_html_dynamic")
write_html_dynamic = function (fnametxt, fnamelst, fnamehtml)
local txt = no_coding(ee_readfile(fnametxt))
local lst = ee_readfile(fnamelst)
local A = {TITLE="Videos", TXT=txt, LST=lst}
local template = ee_readfile(youtubedbdir.."template-dynamic.html")
local html = template:gsub("$(%w+)", A)
ee_writefile(fnamehtml, html)
end
-- «simple_toplevel» (to ".simple_toplevel")
simple_toplevel = function (A) error "OBSOLETE - DELETED" end
-- «simple_toplevel2» (to ".simple_toplevel2")
simple_toplevel2 = function (ys, stem)
local o = {}
local p = function (str) print((str:gsub("$(%w+)", o))) end
local pc = p
local pd = p
local mp4_dir = ys.mp4dir
o.stem = stem
o.mp4dir = mp4_dir:gsub("/$", "") -- wrong
o.tmpdir = "/tmp/ydbtmp" -- wrong
--
local Bigdb = YoutubeDB.new() -- stub
local Bigmp4 = YoutubeDB.new() -- stub
--
-- Read $stem.txt and videos/*.mp4,
-- calculate their intersection and difference,
-- generate $stem.lst and $stem.html.
--
-- Suffixes: a=all, u=unique, r=repetitions
-- i=intersection, o=other
--
local Txt = YoutubeDB.new():read(stem..".txt")
local Mp4 = YoutubeDB.new():ls(mp4_dir.."/")
local Txti, Txto = Txt * Mp4, Txt - Mp4
local Mp4i, Mp4o = Mp4 * Txt, Mp4 - Txt
o.Ta = Txt:n0(); o.Tu = Txt:n(); o.Tr = Txt:nrepetitions()
o.Ma = Txt:n0(); o.Mu = Txt:n(); o.Mr = Txt:nrepetitions()
o.Ti = Txti:n(); o.To = Txto:n()
o.Mi = Mp4i:n(); o.Mo = Mp4o:n()
for _,str in ipairs(split("Ta Tu Tr Ma Mu Mr Ti To Mi Mo" )) do
o[str:lower()] = tostring(o[str]):gsub(".", " ")
end
--
p "Reading: $stem.txt $Ta videos ($Tu ids + $Tr repetitions)"
p "Reading: $mp4dir/*.mp4 $Ma mp4s ($Mu ids + $Mr repetitions)"
-- p " mp4s: $Mr+($Mo+$Mi)"
-- p " .txt: $mr $mo($Ti+$To)+$Tr"
-- p " $mr $mo $Mi $to $tr mp4s mentioned in the .txt"
-- p " $mr $Mo $mi $to $tr mp4s not mentioned in the .txt"
-- p " $Mr $mo $mi $to $tr mp4s that are repetitions"
-- p " $mr $mo $Ti $to $tr urls in the .txt have local mp4s"
-- p " $mr $mo $ti $To $tr urls in the .txt don't have local mp4s"
-- p " $mr $mo $ti $to $Tr urls in the .txt are repetitions"
p " ____________"
p " | Mp4s |"
p " | _____|_______"
p " | M-T | | .txt | Mp4s not in the .txt: $Mo"
p " | | M*T | | Mp4s mentioned in the .txt: $Mi"
p " |______|_____| T-M | Urls in the .txt without local mp4s: $To"
p " | |"
p " |_____________|"
p ""
p "Generating: $stem.lst ($Ti mp4s listed)"
Mp4i:write_lst(stem..".lst")
if ys.nohtml then
p "Not generating $stem.html (due to \"-nohtml\")"
else
write_html_dynamic(stem..".txt", stem..".lst", stem..".html")
p "Generating: $stem.html (dynamic version, with javascript)"
end
--
--
-- Calculate "Mp4less", "Dateless", "Titleless".
-- Note that "Mp4less", "Dateless", etc are subsets of Txt,
-- created by filters like ":date_no()" using shallow copies; this
-- means that when we add dates and titles to their entries these
-- changes will be propagated back to the full "Txt" structure.
--
-- Suffixes: l="-less"
--
local Mp4less = Txt - Mp4 -- a subset of Txt
local Dateless = Txt:date_no() -- a subset of Txt
local Titleless = Txt:title_no() -- a subset of Txt
o.Ml = Mp4less:n()
o.Datel = Dateless:n()
o.Titlel = Titleless:n()
p ""
p "Missing mp4s (not in $mp4dir/*.mp4): $Ml"
p "Missing dates (not in $stem.txt): $Datel"
p "Missing titles (not in $stem.txt): $Titlel"
--
-- Find as many missing mp4s, dates, and titles as possible that
-- can be copied or read from disk (i.e., without downloads from
-- youtube).
--
-- Suffixes: f = found, d = needs download.
--
Dateless:dates_cat() -- this adds fields to Txt
Titleless:titles_cat() -- this adds fields to Txt
--
local Mp4f = Bigmp4 * Mp4less -- not a subset of Txt
local Mp4d = Mp4less - Mp4f -- a subset of Txt
local Datef = Dateless:date_yes() -- a subset of Txt
local Dated = Dateless:date_no() -- a subset of Txt
local Titlef = Titleless:title_yes() -- a subset of Txt
local Titled = Titleless:title_no() -- a subset of Txt
o.Mf = Mp4f:n()
o.Md = Mp4d:n()
o.Datef = Datef:n()
o.Dated = Dated:n()
o.Titlef = Titlef:n()
o.Titled = Titled:n()
pc"Mp4s found in Big_mp4_db: $Mf"
pd"Dates found in Big_video_db: $Datef"
pd"Titles found in Big_video_db: $Titlef"
p "Reading: /tmp/ydb/*.date ($Datef missing dates found there)"
p "Reading: /tmp/ydb/*.title ($Titlef missing titles found there)"
pc"Mp4s to copy: $Mf"
p "Mp4s to download: $Md"
p "Dates to download: $Dated"
p "Titles to download: $Titled"
--
-- Generate scripts to copy/download the missing mp4s, dates and titles
p ""
pc"Generating: $stem.cp ($Mf mp4s to copy) (NOT YET)"
p "Generating: $stem.dlv ($Md mp4s to download)"
p "Generating: $stem.dld ($Dated dates to download)"
p "Generating: $stem.dlt ($Titled titles to download)"
p "Generating: $stem.sh with all the scripts above combined"
local script_cp = ys:cp_mp4(Mp4f:fnames_list()) .. "\n"
local script_dlv = ys:dl_mp4(Mp4d:hsorted()) .. "\n"
local script_dld = ys:dl_date(Dated:hsorted()) .. "\n"
local script_dlt = ys:dl_title(Titled:hsorted()) .. "\n"
local script_sh = script_cp..script_dld..script_dlt..script_dlv
ys:write_script(stem..".cp", script_cp)
ys:write_script(stem..".dld", script_dld)
ys:write_script(stem..".dlt", script_dlt)
ys:write_script(stem..".dlv", script_dlv)
ys:write_script(stem..".sh", script_sh)
--
-- Generate stem.txt2.
-- Note that some operations above may have added dates and titles
-- to "Txt", so it may have more information than initially.
p ""
p "Generating: $stem.txt2"
Txt:write_new_txt(stem..".txt", stem..".txt2")
--
-- Tell the user how to use the scripts
p("")
p("# Now run these, in any order (the #-ed lines are not needed):")
-- p("source $stem.dld")
-- p("source $stem.dlt")
-- p("source $stem.dlv")
p("source $stem.sh")
p("tkdiff $stem.txt $stem.txt2")
p("cp -v $stem.txt2 $stem.txt")
end
-- Local Variables:
-- coding: raw-text-unix
-- modes: (lua-mode fundamental-mode)
-- End: