Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
-- This file:
-- http://anggtwu.net/blogme3/youtube.lua
-- http://anggtwu.net/blogme3/youtube.lua.html
--  (find-angg        "blogme3/youtube.lua")
--  (find-lua-links   "blogme3/youtube.lua")

-- Version: 2013sep24
--
-- (find-blogme3 "anggdefs.lua" "youtube")
-- (find-angg "LUA/youtube.lua")

-- Workflow:
--   first we add all videos in "/bigarchive/videos/",
--   then all videos in "thispage/mentionedvideos/",
--   then all the URLs in the input (each with its title),
--   then we iterate through the array youtube_database[] to issue
--     commands to download the videos that are mentioned in the input
--     but for which we don't have local copies yet, and to issue
--     commands to copy to "thispage/mentionedvideos/" all the
--     mentioned videos for which we only have local copies at
--     "/bigarchive/videos/".
--
-- Typically this will mean something like:
--
--   youtube_copy_to = "thispage/mentionedvideos/"
--   youtube_ls           "ls /bigarchive/videos/*"
--   youtube_copy_to = nil
--   youtube_ls     "ls thispage/mentionedvideos/*"
--   youtube_add_urls(input)
--   print("cd /bigarchive/videos/")
--   youtube_print_downloads()
--   youtube_print_copies()
--
-- Then rinse, wash, repeat.
-- When there are no remaining downloads or copies to do then the
-- local repo is good, and we can be copy it to a pen drive.

youtube_database = {}  -- indexed by hash
youtube_copy_to = nil

-- Shell scripts.
youtube_download = function (A)
    local cmd = string.format("# %s:\n"..
        "~/usrc/youtube-dl/youtube-dl -t -f 18 \\\n"..
        "  --write-thumbnail --restrict-filenames \\\n"..
        "  '%s'\n",
        A.title, A.url)
    print(cmd)
  end
youtube_copy = function (A)
    local cmd1 = format("cp -vl %s %s", A.fname, A.copy_to)
    local cmd2 = format("cp -vl %s %s", A.image, A.copy_to)
    print(cmd1)
    print(cmd2)
  end
youtube_print_downloads = function () end   -- already printed
youtube_print_copies    = function () end   -- already printed

-- (find-angg "LUA/lua50init.lua" "youtube_make_url")
--youtube_make_url = function (hash, time)
--    return "http://www.youtube.com/watch?v=" .. hash
--  end

-- Simple string operations.
youtube_simplify_fname_title = function (str)
    str = str:match("^(.-)-*$")    -- remove trailing "-"s
    str = str:gsub("_", " ")       -- convert "_"s to spaces
    str = str:match("^(.-)%s*$")   -- remove trailing whitespace
    return str
  end
youtube_simplify_url_title = function (str)
    str = str:match("^[ \t\15]*(.-)[ \t\15]*$")
    return str
  end
youtube_video_ext = function (ext)   -- can the browser play this?
    if ext == ".mp4" or ext == ".webm" then return true end
  end

-- Splitters.
youtube_split_url0 = function (li)
    local a, url, b, title, c = li:match "^(.-)(https?://%S*)(%s*)(.-)(%s*)$"
    if not url then return end
    local hash, time
    for key,value in url:gmatch "[?&](%w*)=([^?&]*)" do
      if key == "v" then hash = value end
      if key == "t" then time = value end  -- not being used now
    end
    if not hash then return end
    return a, hash, b, title, c
  end
youtube_split_fname0 = function (str)
    local a, part = str:match "^(.-)(%.part)$"
    str = a or str
    local b, ext  = str:match "^(.-)(%.%w%w%w?%w?%w?)$"
    str = b or str
    local dir, c  = str:match "^(.*/)([^/]*)$"
    str = c or str
    local d, hash = str:match "^(.-)(...........)$"
    if hash and hash:match "[^-_A-Za-z0-9]" then d, hash = nil, nil end
    str = d or str
    return dir, str, hash, ext, part
  end
youtube_split_fname = function (fname, A)
    A = A or {}
    local dir, ftitle, hash, ext, part = youtube_split_fname0(fname)
    if not hash then return end
    if not youtube_video_ext(ext) then return end
    --
    A.fname  = A.fname  or fname
    --
    A.dir    = A.dir    or dir
    A.ftitle = A.ftitle or ftitle
    A.hash   = A.hash   or hash
    A.ext    = A.ext    or ext
    A.part   = A.part   or part
    --
    A.title  = A.title  or youtube_simplify_fname_title(A.ftitle)
    A.url    = youtube_make_url(A.hash)
    --
    -- A.fname  = A.dir .. A.ftitle .. A.hash .. ".jpg"  -- TEMPORARY HACK
    return A, A.hash
  end

-- Process filenames.
youtube_ls00 = function (fname)
    local A = youtube_split_fname(fname)
    if not A then return end
    A.image =  A.dir .. A.ftitle .. A.hash .. ".jpg"  -- HACK, for tests
    A.copy_to = youtube_copy_to
    youtube_database[A.hash] = A
    return A
  end
youtube_ls0 = function (ls_output)
    for fname in ls_output:gmatch "([^\n]+)" do
      youtube_ls00(fname)
    end
  end
youtube_ls = function (cmd, verbose)
    local output = getoutput(cmd)
    if verbose then print(output) end
    youtube_ls0(output)
  end

-- Process a line that may be a youtube URL + title.
youtube_add_url0 = function (hash, title)
    local A = youtube_database[hash]
    if not A then         -- If we don't have a local copy of that video
      A = {}              --   create a "remote register" for it
      A.hash  = hash      --   with the hash of the URL and the URL title
      A.url   = youtube_make_url(hash)
      A.title = title
      youtube_database[hash] = A
      youtube_download(A) --   run youtube-dl to download a local copy
    else                  -- If we have a local copy at a bad directory
      if A.copy_to and not A.mentioned then
        youtube_copy(A)   --     copy it to a better place
      end
    end
    A.title = title
    A.mentioned = true
    return A
  end
youtube_add_url = function (hash_or_url, title)
    local a, hash, b, title, c = youtube_split_url0(hash_or_url)
    return youtube_add_url0(hash or hash_or_url, title)
  end
youtube_add_url_line = function (li)
    -- local hash, time, url, rest = youtube_split_url0(li)
    local a, hash, b, rest, c = youtube_split_url0(li)
    if not hash then return end
    local title = youtube_simplify_url_title(rest)
    return youtube_add_url0(hash, title)
  end
youtube_add_urls = function (bigstr)
    local As = {}
    for li in bigstr:gmatch "([^\n]+)" do
      local A = youtube_add_url_line(li)
      table.insert(As, A)   -- note that when A is nil As doesn't grow
    end
    return As
  end

-- Rendering text.
-- This has been reimplented in Javascript in the new version, see:
-- 
Yurl = function (url)
    return "(" .. HREF(url, "YT") .. ")"
  end
Ytitle = function (A, title)
    if A.fname
    then return HREF(A.fname, Q(title))
    else return               Q(title)
    end 
  end
Yline = function (li)   -- li may or may not have a youtube URL
    local A = youtube_add_url_line(li)
    -- PP(A)
    if not A then return htmlizeline(li) end
    local a, url, b, title, c = youtube_split_url0(li)
    return a .. Yurl(A.url) .. b .. Ytitle(A, title) .. c
  end
Ylines = function (bigstr)
    return (bigstr:gsub("([^\n]+)", Yline))
  end

-- Rendering thumbnails.
-- I stopped using this because it was making the result heavy to render.
thumbwidth = ' width=10%'
Yimage = function (image)
    return image and ('<img src="'..image..'" '..thumbwidth..'>') or "[]"
  end
Ythumb = function (A)
    local title = A.title:gsub('"', '&quot;')
    local image = A.image
    local a = Yimage(image)
    local b = HREF(A.fname or A.url, a)
    local c = '<span title="' .. title .. '">' .. b .. '</span>'
    return c .. "\n"
  end
Ythumbs0 = function (As)
    return table.concat(map(Ythumb, As))
  end
Ythumbs = function (bigstr)
    local As = {}
    for li in bigstr:gmatch("([^\n]+)") do
      table.insert(As, (youtube_add_url_line(li)))
    end
    return Ythumbs0(As)
  end

-- Debugging stuff (for the old version)
youtube_database_entries0 = function ()
    local As = {}
    for k,A in pairs(youtube_database) do
      table.insert(As, A)
    end
    return As
  end
youtube_database_entries = function ()
    local f = function (A) return (A.fname or "").."  "..(A.hash or "") end
    local lt = function (A, B) return f(A) < f(B) end
    As = youtube_database_entries0()
    table.sort(As, lt)
    return As
  end
youtube_database_ipairs = function ()
    return ipairs(youtube_database_entries())
  end
youtube_database_print = function ()
    for _,A in youtube_database_ipairs() do PP(A) end
  end

youtube_ls_copy = function (cmd, copy_to, verbose)
    youtube_copy_to = copy_to
    youtube_ls(cmd, verbose)
  end
youtube_cd_ls_copy = function (cddir, lsarg, copy_to, verbose)
    local cmd = format("[ -e %s ] && cd %s && ls %s", cddir, cddir, lsarg)
    youtube_ls_copy(cmd, copy_to, verbose)
  end

-- 2013sep24: coroutine iterator tricks (new, experimental)
coy = coroutine.yield
cow = coroutine.wrap
-- coc = coroutine.create
-- cor = coroutine.resume
-- cos = coroutine.status

video_fnames = function (bigstr, filter)
    filter = filter or function (hash, fname) return true end
    return cow(function ()
        for fname in bigstr:gmatch("%S+") do
          local hash = fname:match "(...........)%.mp4$"
          if hash and filter(hash, fname) then coy(hash, fname) end
        end
      end)
  end
--
-- Obsolete: now the javascript creates the table itself... 
-- (find-anggfile "local-videos.js" "add_mp4s =")
video_fnames_js = function (bigstr, filter)
    local A = {}
    for hash,fname in video_fnames(ols, filter) do
      table.insert(A, format("  '%s': '%s'", hash, fname))
    end
    return "{\n"..table.concat(A, ",\n").."\n}"
  end


-- TODO:
-- take the filenames from "-big" and "-files"; generate html
-- take the filenames from "-files" only; generate html
-- 
-- for hash,title in mentioned_hashes(body) do
--   if     in_omit[hash] then    -- do nothing
--   elseif in_files[hash] then   -- do nothing
--   elseif in_big[hash] and not in_files[hash] then
--     do_copy(hash)
--   else
--     do_download(hash)
--   end
-- end


--[==[
* (eepitch-blogme3)
* (eepitch-kill)
* (eepitch-blogme3)
ols = getoutput "ls -d /sda5/videos/*"
= ols
= video_fnames_js(ols)

for h,fn in video_fnames(ols) do print(h, fn) end
for h,fn in video_fnames(ols) do printf("  '%s': '%s',\n", h, fn) end

--]==]



-- Blogme words.
def [[ Y'   1q body Ylines(body) ]]
def [[ PY   1  body PRE(Ylines(body)) ]]
def [[ PY'  1q body PRE(Ylines(body)) ]]
def [[ PYT  1  body PRE(Ylines(body)) .. Ythumbs(body) ]]
def [[ PYT' 1q body PYT(body)                          ]]

blogmeboxstyle = ' width: 40em; padding: 4px;'
def [[ BLOGMEBOX 1 body
       -- '<div style="font-family: monospace;">\n'..
       '<div style="background: #ffda99;'..blogmeboxstyle..'">'..
       body..
       -- "</div>\n"..
       "</div>\n"
       ]]

def [[ B'   1q body BLOGMEBOX(PRE(body))               ]]
def [[ BYT' 1q body BLOGMEBOX(PYT(body))               ]]

--------[ 2013oct04 ]--------

meta_first_time = function ()
    local T = {}
    return function (hash)
        if not T[hash]       -- if hash is not yet in T
        then T[hash] = "present"; return true
        else return false
        end
      end
  end

lv_file_extension = function (fname)
    return (fname:match("%.(%w+)$"))
  end

lv_file_is_video = function (fname)
    local ext = lv_file_extension(fname)
    return ext=="mp4" or ext=="webm"
  end

gen_nonempty_lines = function (bigstr)
    return bigstr:gmatch("[^\n]+")
  end

gen_video_files = function (bigstr)
    return cow(function ()
        for line in gen_nonempty_lines(bigstr) do
          local A = youtube_split_fname(line)
          if A and lv_file_is_video(A.fname) then
            coy(A.hash, A.fname)
	  end
        end
      end)
  end

gen_mentioned_videos = function (bigstr)
    local first_time = meta_first_time()
    return cow(function ()
        for line in gen_nonempty_lines(bigstr) do
          local a, hash, b, title, c = youtube_split_url0(line)
          if hash and first_time(hash) then
            coy(hash, title)
          end
        end
      end)
  end

-- In my setting the "big archive" is at /sda5/videos/manifs/
-- and the "pendrive archive" is at (~/TH/L/)manifs/ ...
-- Note that they are disjoint. We copy the _mentioned_ videos
-- from "big" to "pendrive", and the stuff from "pendrive" to
-- a real pendrive.

lv_mentioned    = function (hash) return lv_mentioned_videos[hash] end
lv_pendrive_has = function (hash) return lv_pendrive_videos[hash] end
lv_big_has      = function (hash) return lv_big_videos[hash] end
lv_hd_has       = function (hash)
    return lv_pendrive_has(hash) or lv_hd_has(hash)
  end

lv_needs_copy = function (hash)
    return lv_mentioned(hash) and not lv_pendrive_has(hash) and lv_big_has(hash)
  end

lv_needs_download = function (hash)
    return lv_mentioned(hash) and not lv_big_has(hash)
  end

lv_big_ls           = ""  -- input, generated by an "ls"
lv_pendrive_ls      = ""  -- input, generated by an "ls"
lv_text             = ""  -- input, text with youtube links
lv_big_videos       = {}  -- internal, hash -> fname
lv_pendrive_videos  = {}  -- internal, hash -> fname
lv_mentioned_videos = {}  -- internal, hash -> title
lv_downloads        = {}  -- output, array of {hash, title} pairs
lv_cps              = {}  -- output, array of fnames
lv_pendrive_files   = {}  -- output, array of fnames (for foo-pendrive.html)
lv_hd_files         = {}  -- output, array of fnames (for foo-hd.html)

-- Input:
--   lv_big_ls
--   lv_pendrive_ls
--   lv_text
-- Output:
--   lv_downloads
--   lv_cps
--   lv_pendrive_files
--   lv_hd_files
--
lv_prepare_tables = function ()
    for hash,fname in gen_video_files(lv_big_ls) do
      lv_big_videos[hash] = fname
    end
    for hash,fname in gen_video_files(lv_pendrive_ls) do
      lv_pendrive_videos[hash] = fname
    end
    for hash,title in gen_mentioned_videos(lv_text) do
      lv_mentioned_videos[hash] = title
      local b = (lv_big_videos[hash]      and "B" or " ")
      local p = (lv_pendrive_videos[hash] and "P" or " ")
      local bp = b..p
      if     bp == "  " then
        table.insert(lv_downloads,      {hash, title})
      elseif bp == "B " then
        table.insert(lv_cps,            lv_big_videos[hash])
        table.insert(lv_hd_files,       lv_big_videos[hash])
      elseif  p ==  "P" then
        table.insert(lv_pendrive_files, lv_pendrive_videos[hash])
        table.insert(lv_hd_files,       lv_pendrive_videos[hash])
      end
    end
  end    -- incomplete?

lv_download_cmd = function (hash_title)
    local hash, title = hash_title[1], hash_title[2]
    return string.format("# %s:\n"..
        "youtube-dl -t -f 18 \\\n"..
        "  --write-thumbnail --restrict-filenames \\\n"..
        "  '%s'\n\n",
        title, youtube_make_url(hash))
  end
lv_cp_to  = "~/TH/L/manifs/"
lv_cp_cmd = function (fname)
    return "cp -ivl "..fname.." "..lv_cp_to.."\n"
  end
lv_dl_to  = "/sda5/videos/manifs/"
lv_download_cmds = function ()
    return "cd "..lv_dl_to.."\n"..
           mapconcat(lv_download_cmd, lv_downloads)
  end
lv_cp_cmds = function ()
    return mapconcat(lv_cp_cmd, lv_cps)
  end



-- gen_cps = function () return cow(function ()
--     for hash,title in gen_mentioned_videos(bigstr) do
--       local a, hash, b, title, c = youtube_split_url0(line)
--       if hash and first_time(hash) then
--             coy(hash, title)
--           end
--         end
--   end) end


ee_readfile = function (fname) return readfile(ee_expand(fname)) end
ee_writefile = function (fname, bigstr)
    fname = ee_expand(fname)
    if fname == "-" then io.write(bigstr) else writefile(fname, bigstr) end
  end
ls_videos = function (dir)
    local cmd = "[ -d DIR ] && find DIR -maxdepth 1 -name '*.mp4' | sort"
    cmd = cmd:gsub("DIR", ee_expand(dir))
    return getoutput(cmd)
  end

lua_eval = function (str) return assert(loadstring(str))() end

odef_split = function (odefstr)
    return string.match(odefstr, "^%s*(%S+)%s+(%S+)%s+(.*)")
  end
odef_ = function (odefstr)
    local optname, argstr, body = odef_split(odefstr)
    return format('_O["-%s"] = function (%s)\n%s\ndooptions(...)\nend\n'..
		  'dooption_%s = _O["-%s"]\n',
      optname, argstr, body,
      optname, optname)
  end
odef = function (odefstr)
    lua_eval(odef_(odefstr))
  end

-- Input options:
odef [[ big_videos     fname,...  lv_big_ls      = lv_big_ls ..
                                                   ee_readfile(fname)      ]]
odef [[ big_videos_ls  dir,...    lv_big_ls      = lv_big_ls ..
                                                   ls_videos(dir)          ]]
odef [[ videos         fname,...  lv_pendrive_ls = lv_pendrive_ls ..
                                                   ee_readfile(fname)      ]]
odef [[ videos_ls      dir,...    lv_pendrive_ls = lv_pendrive_ls ..
                                                   ls_videos(dir)          ]]
odef [[ text           fname,...  lv_text = lv_text .. ee_readfile(fname)  ]]

-- midstuff
lv_blogme = blogmedir.."local-videos.blogme"
odef [[ lv_blogme      fname,...  lv_blogme = fname ]]

odef [[ lv_calc  ...           lv_prepare_tables() ]]
odef [[ lv_html  outfname,...  lv_prepare_tables()
                               -- local input = blogmedir.."local-videos.blogme"
                               -- doblogme(readfile(input))
                               doblogme(readfile(lv_blogme))
                               ee_writefile(outfname, blogme_output) ]]

-- Output options:
odef [[ copy_to  dir,...    lv_copy_to = dir                        ]]
odef [[ dl_to    dir,...    lv_dl_to   = dir                        ]]
odef [[ cps      fname,...  ee_writefile(fname, lv_cp_cmds())       ]]
odef [[ dls      fname,...  ee_writefile(fname, lv_download_cmds()) ]]






-- Local Variables:
-- coding: raw-text-unix
-- End: