Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
#######
#
# E-scripts on wget (and curl).
#
# Note 1: use the eev command (defined in eev.el) and the
# ee alias (in my .zshrc) to execute parts of this file.
# Executing this file as a whole makes no sense.
# An introduction to eev can be found here:
#
#   (find-eev-quick-intro)
#   http://angg.twu.net/eev-intros/find-eev-quick-intro.html
#
# Note 2: be VERY careful and make sure you understand what
# you're doing.
#
# Note 3: If you use a shell other than zsh things like |&
# and the for loops may not work.
#
# Note 4: I always run as root.
#
# Note 5: some parts are too old and don't work anymore. Some
# never worked.
#
# Note 6: the definitions for the find-xxxfile commands are on my
# .emacs.
#
# Note 7: if you see a strange command check my .zshrc -- it may
# be defined there as a function or an alias.
#
# Note 8: the sections without dates are always older than the
# sections with dates.
#
# This file is at <http://angg.twu.net/e/wget.e>
#           or at <http://angg.twu.net/e/wget.e.html>.
#        See also <http://angg.twu.net/emacs.html>,
#                 <http://angg.twu.net/.emacs[.html]>,
#                 <http://angg.twu.net/.zshrc[.html]>,
#                 <http://angg.twu.net/escripts.html>,
#             and <http://angg.twu.net/>.
#
#######




# «.mailing-list»		(to "mailing-list")
# «.wget_patch_potato»		(to "wget_patch_potato")
# «.accept-and-reject»		(to "accept-and-reject")
# «.user-agent»			(to "user-agent")
# «.wget-flv-player»		(to "wget-flv-player")
# «.--no-check-certificate»	(to "--no-check-certificate")
# «.overwriting»		(to "overwriting")
# «.recursive»			(to "recursive")
# «.curl»			(to "curl")
# «.wget-O-»			(to "wget-O-")
# «.wget2»			(to "wget2")
# «.e-mail-2021oct07»		(to "e-mail-2021oct07")
# «.curl-I»			(to "curl-I")
# «.not-converting-space»	(to "not-converting-space")
# «.psne-plkp»			(to "psne-plkp")




#####
#
# mailing-list
# 2021oct06
#
#####

# «mailing-list»  (to ".mailing-list")
# https://www.gnu.org/software/wget/
# https://lists.gnu.org/mailman/listinfo/bug-wget
# https://lists.gnu.org/mailman/listinfo/wget-dev
# https://lists.gnu.org/archive/html/bug-wget/
# https://lists.gnu.org/archive/html/wget-dev/




#####
#
# wget (potato)
# 2000jun04
#
#####

# (find-status "wget")
# (find-vldifile "wget.list")
# (find-status   "wget")
# (find-vldifile "wget.list")
# (find-udfile   "wget/")
# (find-fline "/usr/doc/wget/")
# (find-fline "/usr/doc/wget/NEWS.gz" ".wgetrc")


# (find-node "(wget)Top")
# (find-node "(wget)Download Options")
# (find-node "(wget)Download Options" "timeout")
# (find-fline "~/.wgetrc")




#####
#
# my patch for wget, for potato
# 2000apr03
#
#####

# «wget_patch_potato»  (to ".wget_patch_potato")

# (find-fline "$SDEBIAN/ls-lR.i" "potato/main/source/web/wget")
# Note that the .orig.tar.gz is the same as slink's.

# Trick to generates a .deb with a different version:
# (find-packfile "packaging.text.gz" "changelog")
# (find-packfile "packaging.text.gz" "\n3.2.3. `debian/changelog'")

# (find-es "slink" "wget_patch")
#*
pdsc $SDEBIAN/dists/potato/main/source/web/wget_1.5.3-3.dsc
cd /usr/src/wget-1.5.3/src/
patch url.c <<'--%%--'
59c59
< # define URL_UNSAFE " <>\"#%{}|\\^~[]`@:\033"
---
> # define URL_UNSAFE " <>\"#%{}|\\^[]`@:\033" /* Edrx: removed "~" */
--%%--
cd /usr/src/wget-1.5.3/debian/
patch changelog <<'--%%--'
0a1,6
> wget (1.5.3-3edrx) unstable; urgency=low
> 
>   * Removed "~" from URL_UNSAFE.
>   
>  -- Eduardo Ochs <edrx@mat.puc-rio.br>  Tue, 3 Apr 2000 20:00:00 -0300
> 
--%%--

cd /usr/src/wget-1.5.3/
etags src/*.[ch]
debian/rules binary	|& tee odrb

#*
mv -iv /usr/src/wget*deb /usr/src/.debs/
dpkg -i /usr/src/.debs/wget_1.5.3-3edrx_i386.deb





######
#
# wget 1.5.3 (slink, with patch)
#
######

# «wget_patch»

# For more on wget hacking, see:
# (find-es "net" "wget_hacking")

pdsc /big/slinks2/dists/slink/main/source/web/wget_1.5.3-1.1.dsc
cd /usr/src/wget-1.5.3/src/
patch url.c <<'--%%--'
59c59
< # define URL_UNSAFE " <>\"#%{}|\\^~[]`@:\033"
---
> # define URL_UNSAFE " <>\"#%{}|\\^[]`@:\033" /* Edrx: removed "~" */
--%%--
cd /usr/src/wget-1.5.3/
etags src/*.[ch]
debian/rules binary	|& tee odrb
mv -iv /usr/src/wget*deb /usr/src/.debs/
apt-update

cd /usr/src/.debs/
dpkg -i wget_1.5.3-1.1_i386.deb





#######
#
# wget 1.5.2
#
#######

cd /usr/src/
rm -Rv wget-1.5.2/
tar -xvzf /snarf/http/sunsite.auc.dk/ftp/pub/infosystems/wget/wget-1.5.2.tar.gz
cd /usr/src/wget-1.5.2/
./configure --disable-nls --prefix=/usr |& tee oc
#
# lynx doesn't like "~" as "%7E", remove it from URL_UNSAFE
# (find-fline "/usr/src/wget-1.5.2/src/url.c" "URL_UNSAFE")
#
make |& tee om
cd /usr/src/wget-1.5.2/
make install |& tee omi

# (find-wgfile "src/url.c" "convert_links")





#######
#
# wget 1.5.2: bug on RH, "Cannot determine user-id."
#
#######

cd /usr/src/wget-1.5.2/src/
gcc -I. -DHAVE_CONFIG_H -DSYSTEM_WGETRC=\"/usr/etc/wgetrc\" -DLOCALEDIR=\"/usr/share/locale\" -Wall -Wno-implicit -E host.c > host.E
# (find-wgfile "src/host.E" "Cannot")
# (find-wgfile "src/utils.c" "pwd_cuserid")


cd ~/C/
cat > pwuid.c <<'---'
#include <pwd.h>
#include <sys/types.h>
#include <stdio.h>
main() {
  struct passwd *pwd;
  pwd = getpwuid (getuid ());
  printf("%d\n", pwd);
  if (!pwd || !pwd->pw_name)
    exit(1);
  printf("%s\n", pwd->pw_name);
}
---
gcc -o pwuid pwuid.c
./pwuid




#######
#
# wget 1.5.3 (on boto)
#
#######

psne ftp://sunsite.auc.dk/pub/infosystems/wget/wget-1.5.3.tar.gz

cd $USRC/
rm -Rv wget-1.5.3/
tar -xvzf $S/ftp/sunsite.auc.dk/pub/infosystems/wget/wget-1.5.3.tar.gz
cd $USRC/wget-1.5.3/
./configure --disable-nls --prefix=/usr |& tee oc
#
# lynx doesn't like "~" as "%7E", remove it from URL_UNSAFE
# (find-fline  "$USRC/wget-1.5.3/src/url.c" "URL_UNSAFE")
# (find-fline "$BUSRC/wget-1.5.3/src/url.c" "URL_UNSAFE")
#
cd $USRC/wget-1.5.3/
make |& tee om

cp src/wget ~/bin-Linux/




######
#
# wget 1.5.3 (slink) - hacking
#
######

# «wget_hacking»

# The basic patch is at:
# (find-es "slink" "wget_patch")

# Check that apt installed the patched version.
# (find-fline "/usr/src/.debs/")
# (find-vldifile "wget.postinst")

# How I got the input for "patch":
#cd /usr/src/wget-1.5.3/src/; cp -iv url.c url.c~
# lynx doesn't like "~" as "%7E", remove it from URL_UNSAFE
# (find-wgfile "src/url.c" "URL_UNSAFE")
#diff url.c~ url.c | tee ~/o

#
# Now for some heavier debugging.
#

# (find-fline "~/GDB/.gdbinit.wget")
# (gdb "gdb -x ~/GDB/.gdbinit.wget")

cd /snarf/http/
wget -r -nc -k http://hypatia.dcs.qmw.ac.uk/html/authors.html

# (find-wgtag "convert_links")
# (find-wgtag "convert_all_links")





#####
#
# accepting and rejecting
# 2004oct18
#
#####

# «accept-and-reject»  (to ".accept-and-reject")
#*
# (find-node "(wget)Types of Files")
rm -Rv /tmp/wgetish/
mkdir  /tmp/wgetish/
cd     /tmp/wgetish/
wget -r -R "*.deb" -R "*\\?*" http://127.0.0.1/

#*
# It still downloads all the things like "index.html?N=A", so maybe
# it's better to remove them by hand after the full download




#####
#
# user-agent
# 2005mar16
#
#####

# «user-agent»  (to ".user-agent")
# (find-es "http" "disguise-user-agent")
# (find-node "(wget)HTTP Options" "`--user-agent=AGENT-STRING'")



#####
#
# save-cookies and post-data
# 2006aug13
#
#####

# (find-node "(wget)HTTP Options" "wget --save-cookies cookies.txt")
# (find-node "(wget)HTTP Options" "--post-data 'user=foo&password=bar'")





#####
#
# wget 1.10.2 (for flv-player?)
# 2007apr08
#
#####

# «wget-flv-player»  (to ".wget-flv-player")
# http://ftp.debian.org/debian/pool/main/w/wget/
# http://ftp.debian.org/debian/pool/main/w/wget/wget_1.10.2-2.dsc
# http://ftp.debian.org/debian/pool/main/w/wget/wget_1.10.2-2.diff.gz
# http://ftp.debian.org/debian/pool/main/w/wget/wget_1.10.2.orig.tar.gz
#*
rm -Rv ~/usrc/wget/
mkdir  ~/usrc/wget/
cd $S/http/ftp.debian.org/debian/pool/main/w/wget/
cp -v wget_1.10.2* ~/usrc/wget/
cd     ~/usrc/wget/
dpkg-source -sn -x wget_1.10.2-2.dsc
cd     ~/usrc/wget/wget-1.10.2/
dpkg-buildpackage -us -uc -b -rfakeroot     -d    |& tee odb

#*
# (code-c-d "wget" "~/usrc/wget/wget-1.10.2/")
# (find-wgetfile "")
# (find-wgetfile "debian/control" "libssl-dev (>= 0.9.8-1)")
# (find-status "libssl-dev")

# http://www.martijndevisser.com/blog/article/flv-player-updated
# http://www.martijndevisser.com/download/flvplayer/flvplayer_sources.zip



#####
#
# --no-check-certificate
# 2007dec13
#
#####

# «--no-check-certificate»  (to ".--no-check-certificate")
# (find-angg ".wgetrc")




#####
#
# overwriting
# 2011dec29
#
#####

# «overwriting»  (to ".overwriting")
# (find-sh "wget -h")
# (find-sh "wget -h" "-nc, --no-clobber")
# (find-sh "wget -h" "-N,  --timestamping")
# (find-wgetnode "Download Options" "`--no-clobber'")
# (find-wgetnode "Download Options"  "--no-clobber")
# (find-wgetnode "Download Options" "`--timestamping'")
# (find-wgetnode "Download Options"  "--timestamping")
# (find-wgetnode "Time-Stamping")




#####
#
# recursive
# 2013feb23
#
#####

# «recursive» (to ".recursive")
# (find-wgetnode "Recursive Download")
# (find-wgetnode "Directory-Based Limits" "--no-parent")
# (find-wgetnode "Recursive Retrieval Options" "--recursive")
# (find-angg ".zshrc" "psne")
# (find-angg ".zshrc" "psne" "psner")
# (find-esgrep "grep --color=auto -nH --null -e 'wget -r' *.e")
# (find-esgrep "grep --color=auto -nH --null -e 'wgetrecursive' *.e")
# (find-angg ".emacs.templates" "find-wgetrecursive-links")




#####
#
# curl
# 2017jul13
#
#####

# «curl» (to ".curl")
# (find-man "1 curl")
# (find-man "1 curl" "-O, --remote-name")
# https://mail.google.com/mail/ca/u/0/#search/yamauti/15beed9316009cee




#####
#
# wget -O -
# 2021aug05
#
#####

# «wget-O-»  (to ".wget-O-")
# (find-node "(wget)Exit Status")
# (find-eev "eev-plinks.el" "find-wget")
# (find-eev "eev-plinks.el" "find-callprocess")
# (find-efunctiondescr 'call-process "numeric exit status")

* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
URL=http://angg.twu.net/e/agda.e
wget -O - $URL > /tmp/o
echo $?

URL=http://angg.twu.net/e/foo.e
wget -O - $URL > /tmp/o
echo $?

# (find-man "1 dash")
# (find-man "1 dash" "Command Exit Status")



#####
#
# wget2
# 2021oct06
#
#####

# «wget2»  (to ".wget2")
# https://gitlab.com/gnuwget/wget2
# https://gitlab.com/gnuwget/wget2/-/issues
# https://ftp.gnu.org/gnu/wget/
# https://ftp.gnu.org/gnu/wget/wget2-2.0.1.tar.gz
# (find-untgz-links "https://ftp.gnu.org/gnu/wget/wget2-2.0.1.tar.gz" "wget")
# (code-c-d "wget" "~/usrc/wget2-2.0.1/")
# (find-wgetfile "")

* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
rm -Rv ~/usrc/wget2-2.0.1/
tar -C ~/usrc/ -xvzf \
  $S/https/ftp.gnu.org/gnu/wget/wget2-2.0.1.tar.gz
cd     ~/usrc/wget2-2.0.1/

find *         | sort | grep  '.[ch]$' | grep -v '\.[ch]$'
find * -type f | sort | grep '\.[ch]$' | tee .files.ch
grep UNSAFE $(cat .files.ch)
grep '#%{}' $(cat .files.ch)

# (code-c-d "wget" "~/usrc/wget2-2.0.1/")
# (find-wgetfile "")
# (find-wgetgrep "grep --color=auto -nRH --null -e unsafe *")


https://ftp.gnu.org/gnu/wget/wget-1.5.3.tar.gz

* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
rm -Rv ~/usrc/wget-1.5.3/
tar -C ~/usrc/ -xvzf \
  $S/https/ftp.gnu.org/gnu/wget/wget-1.5.3.tar.gz
cd     ~/usrc/wget-1.5.3/

find * -type f | sort | grep '\.[ch]$' | tee .files.ch

# (code-c-d "wget153" "~/usrc/wget-1.5.3/")
# (find-wget153file "")
# (find-wget153file "src/url.c" "# define URL_UNSAFE")



#####
#
# An e-mail that I did not send to the wget mailing list
# 2021oct07
#
#####

# «e-mail-2021oct07»  (to ".e-mail-2021oct07")
# See: (find-es "w32" "wget-1.21.2-win64")
#      (find-angg "2021-oficina/README")

Hi people,

are there wget binaries for Windows, like these ones,

  http://gnuwin32.sourceforge.net/packages/wget.htm
  http://downloads.sourceforge.net/gnuwin32/wget-1.11.4-1-bin.zip

but that correspond to a recent version of wget or wget2, and that
will run on recent versions of Windows? I don't have Windows on my
machine - more explanations soon - and a very non-technical friend of
mine is helping me by testing some things in his machine with
Windows 11... and when we tried to run the wget.exe from the .zip above
in his machine we got a message that said this (in Portuguese):

  Esta versão de c:\Users\danie\AppData\Roaming\bin\wget.exe não é
  compatível com a versão do Windows sendo executada. Verifique as
  informações de sistema do computador e contate o fornecedor do
  software.

In English that means:

  This version of c:\Users\danie\AppData\Roaming\bin\wget.exe is not
  compatible with the version of Windows being executed. Verify the
  system information in this computer and contact the supplier of the
  software.

If I run this in my computer

  rm -Rv /tmp/wget/
  mkdir  /tmp/wget/
  cd     /tmp/wget/
  wget http://downloads.sourceforge.net/gnuwin32/wget-1.11.4-1-bin.zip
  unzip wget-1.11.4-1-bin.zip
  file bin/wget.exe

the command "file" outputs this:

  bin/wget.exe: PE32 executable (console) Intel 80386 (stripped to
  external PDB), for MS Windows

So I _guess_ that my friend's computer is configured to not run 32-bit
programs...

The thing is that I am preparing a workshop that will be attended by
several Windows users who have never used terminals in their lives,
and for some things I will need them to have wget. The full story is
too long and too Emacs-centric... if anyone is really curious it is in
these two threads:

  https://lists.gnu.org/archive/html/help-gnu-emacs/2021-10/msg00037.html
  https://lists.gnu.org/archive/html/help-gnu-emacs/2021-10/threads.html#00037
  https://lists.gnu.org/archive/html/emacs-devel/2021-10/msg00491.html
  https://lists.gnu.org/archive/html/emacs-devel/2021-10/threads.html#00491

In this message Eli Zaretskii suggested that there may be pre-built
binaries of more recent versions of wget, but I couldn't find them...

  https://lists.gnu.org/archive/html/help-gnu-emacs/2021-10/msg00139.html

Thanks in advance!
  Eduardo Ochs
  http://angg.twu.net/#eev




#####
#
# curl -I: Fetch the headers only
# 2022sep03
#
#####

# «curl-I»  (to ".curl-I")
# (find-man "1 curl")
# (find-man "1 curl" "-I, --head")
# (find-sh "curl -I http://example.com")
# (find-sh "curl -I https://example.com")




#####
#
# Disabling the conversion "%20"->" " in the local file name?
# 2023aug20
#
#####

# «not-converting-space»  (to ".not-converting-space")
# https://lists.gnu.org/archive/html/bug-wget/2023-08/msg00014.html
# Subj: Disabling the conversion "%20"->" " in the local file name?

Hi list,

how can I tell wget that "%20"s in URLs should not be converted to
spaces in local file names? For example, if I run this:

  wget     https://etoysillinois.org/files/Etoys%20tutorial%201.pdf
  wget -nc https://etoysillinois.org/files/Etoys%20tutorial%201.pdf

then wget knows that the output document is "Etoys tutorial 1.pdf",
and the "wget -nc" sees that the "Etoys tutorial 1.pdf" is already
there and doesn't download it again... I can make the output document
be "Etoys%20tutorial%201.pdf" with "-O", like this,

  wget -O "Etoys%20tutorial%201.pdf" \
           https://etoysillinois.org/files/Etoys%20tutorial%201.pdf
  wget -O "Etoys%20tutorial%201.pdf" \
       -nc https://etoysillinois.org/files/Etoys%20tutorial%201.pdf

but are there other ways?

Thanks in advance!
  Cheers,
    Eduardo Ochs
    http://anggtwu.net/



#####
#
# A psne that keeps "%"s
# 2023aug21
#
#####

# «psne-plkp»  (to ".psne-plkp")
# (find-angg ".zshrc" "psne")

* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
function psne-plkp () {
  ud=$(urldir $1) &&
  uf=$(urlfile $1) &&
  mkdir -p $ud &&
  cd $ud && {
    # echo $1 >> ~/.psne.log
    # echo wget -O $uf -nc $1
           wget -O $uf -nc $1
} }

psne-plkp https://etoysillinois.org/files/Etoys%20tutorial%201.pdf
echo $ud

psne-plkp http://www.noticias.uff.br/bs/2021/06/100-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/101-21%20RETIFICADO.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/102-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/103-21%20RETIFICADO.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/104-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/105-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/106-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/107-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/108-21%20RETIFICADO.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/109-21%20RETIFICADO.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/110-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/111-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/112-21%20RETIFICADO.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/113-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/114-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/115-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/116-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/117-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/118-21.pdf
psne-plkp http://www.noticias.uff.br/bs/2021/06/119-21.pdf

cd $S/http/www.noticias.uff.br/bs/2021/06/
for i in $(find * | grep pdf); do
  j=$(basename $i).txt
  echo $j
  echo pdftotext -layout $i $j
       pdftotext -layout $i $j
done

grep -i gimba *.txt



#####
#
# dots
#
#####

# (find-node "(wget)Top")
# (find-man "1 wget" "--progress=type")
# (find-node "(wget)Download Options" "--progress=TYPE")



libssl-dev


# (find-es "w32" "wget-2021")


https://news.ycombinator.com/item?id=31246438 History of the browser user-agent string (webaim.org)

https://lists.gnu.org/archive/html/bug-wget/2022-05/msg00016.html GNU Wget2 2.0.1 released
https://eternallybored.org/misc/wget/
https://lists.gnu.org/archive/html/bug-wget/2022-05/msg00025.html 
https://gitlab.com/gnuwget/wget2/-/tags/v2.0.1
https://hurl.dev/blog/2023/06/30/announcing-hurl-4.0.0.html
https://daniel.haxx.se/blog/2023/07/31/introducing-curl-command-line-variables/




#  Local Variables:
#  coding:               utf-8-unix
#  End: