summaryrefslogtreecommitdiff
path: root/parse_stagit
diff options
context:
space:
mode:
Diffstat (limited to 'parse_stagit')
-rwxr-xr-xparse_stagit62
1 files changed, 62 insertions, 0 deletions
diff --git a/parse_stagit b/parse_stagit
new file mode 100755
index 0000000..39feda2
--- /dev/null
+++ b/parse_stagit
@@ -0,0 +1,62 @@
+#!/bin/sh
+
+#
+# Parse stagit-style repos
+#
+
+if [ $# -lt 2 ]; then
+ printf "Usage: %s <urlbase> <destdir>\n" $0
+ exit 1
+fi
+
+FIN="/dev/stdin"
+URLBASE="$1"
+DEST="$2"
+
+SUBPATH="/file"
+
+CURL="torify curl -Ls "
+
+PROTO=${URLBASE%%:\/\/*}
+DIRBASE="$PROTO/${URLBASE##[a-z]*:\/\/}"
+echo "proto: $PROTO"
+echo "dirbase: $DIRBASE"
+
+READMES="README.html README.txt.html README.md.html readme.html readme.txt.html readme.md.html"
+
+
+repos=$($CURL "${URLBASE}" | xml2tsv | grep "/html/body/div/table/tbody/tr/td/a" \
+ | awk '{print $(NF-1), $NF}' \
+ | sed -E 's/href=//g;s/ /\|/'\
+ )
+
+
+for r in $repos; do
+ name=$(echo "$r" | cut -d "|" -f 2 )
+ link=$(echo "$r" | cut -d "|" -f 1 )
+ link="${link%%log.html}"
+ baselink=$(printf "%s/%s" $URLBASE $link)
+ printf "link: %s\nbaselink: %s\n" $link $baselink 1>&2
+ REPODIR="$DEST/$DIRBASE/$link/"
+ mkdir -p $REPODIR
+ for f in $READMES; do
+ printf " trying file %s..." $baselink/$SUBPATH/$f
+ $CURL "$baselink/$SUBPATH/$f" > $REPODIR/$f.tmp
+ failure=$(xml2tsv < $REPODIR/$f.tmp 2>/dev/null | \
+ grep -Eaic "^/html/head/title[[:blank:]]+404 Not Found")
+ echo $failure
+ if [ "$failure" = 1 ]; then
+ printf "[FAILED]\n"
+ else
+ xml2tsv < $REPODIR/$f.tmp 2>/dev/null | \
+ grep -Eai "/html/body/div/pre/a[[:blank:]]+href=#.*[[:blank:]]+class=line" | \
+ cut -f 6- | \
+ sed -E 's/\\n//g;s/\\t/\t/g;s/\\\\/\\/g' > $REPODIR/$f
+ printf "[OK]\n"
+ fi
+ rm -f $REPODIR/$f.tmp
+ sleep 1
+ done
+done
+
+