torrents.csv/scripts/scan_torrents.sh

84 lines
2.6 KiB
Bash
Raw Normal View History

#!/bin/bash
# Checking arguments
# Help line
torrents_csv="`pwd`/../torrents.csv"
scanned_out="`pwd`/../infohashes_scanned.txt"
2019-02-07 08:41:15 -08:00
tmp_torrent_dir="`pwd`/../tmp_torrents-$RANDOM"
torrent_files_csv="`pwd`/../torrent_files.csv"
2018-12-21 13:31:21 -08:00
touch $scanned_out
help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help"
if [ "$1" == "-h" ] || [ -z "$1" ]; then
echo -e $help
exit 1
fi
torrents_dir="$1"
echo "Torrents dir=$torrents_dir"
# Check dependencies
if command -v "torrent-tracker-health" >/dev/null 2>&1 ; then
echo "torrent-tracker-health installed."
else
echo -e "Installing torrent-tracker-health:\nnpm i -g dessalines/torrent-tracker-health \nhttps://github.com/dessalines/torrent-tracker-health\n"
npm i -g install dessalines/torrent-tracker-health
fi
# Loop over all torrents
pushd $torrents_dir
# Copy the unscanned torrent files to a temp dir
mkdir $tmp_torrent_dir
2019-02-09 16:30:17 -08:00
# TODO need to find a better way to do this for huge dirs
find `pwd` -name "*.torrent" | grep -vFf $scanned_out | while read torrent_file ; do
cp "$torrent_file" "$tmp_torrent_dir"
done
2019-01-28 15:01:03 -08:00
# Split these into many directories ( since torrent-tracker-health can't do too many full size torrents)
cd $tmp_torrent_dir
# i=1;while read l;do mkdir $i;mv $l $((i++));done< <(ls|xargs -n100)
2019-12-16 12:15:41 -07:00
ls|parallel -n10 mkdir {#}\;mv {} {#}
2019-01-26 00:25:07 -08:00
2019-01-28 15:01:03 -08:00
for tmp_torrent_dir_sub in *; do
2019-02-07 08:41:15 -08:00
echo "sub dir:$tmp_torrent_dir/$tmp_torrent_dir_sub"
2019-01-28 15:01:03 -08:00
find $tmp_torrent_dir_sub -type f -exec basename {} .torrent \; > names.out
2019-01-28 15:01:03 -08:00
# Delete null torrents from the temp dir
find $tmp_torrent_dir_sub -name "*.torrent" -size -2k -delete
2019-01-28 15:01:03 -08:00
if [ -z "$(ls -A $tmp_torrent_dir_sub)" ]; then
echo "No new torrents."
2019-01-26 00:25:07 -08:00
else
2019-01-28 15:01:03 -08:00
# Scrape it
torrent-tracker-health --torrent "$tmp_torrent_dir_sub"/ > health.out
# Convert the json results to csv format
2019-02-07 08:41:15 -08:00
# Created is sometimes null, and a weird date
results=$(jq -r '.results[] | select (.created != null ) | [.hash, .name, .length, (.created | .[0:16] | strptime("%Y-%m-%dT%H:%M") | mktime), .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out)
2019-01-28 15:01:03 -08:00
# If there are no results
if [ -z "$results" ]; then
echo "There were no results for some reason."
2019-02-07 08:41:15 -08:00
cat health.out
2019-01-28 15:01:03 -08:00
else
echo "Torrents.csv updated with new torrents."
2019-09-23 15:30:38 -07:00
echo "$results"
2019-01-28 15:01:03 -08:00
# Update the torrents.csv and infohashes scanned file
echo -e "$results" >> $torrents_csv
cat names.out >> $scanned_out
fi
2019-01-26 00:25:07 -08:00
fi
2019-01-28 15:01:03 -08:00
done
2018-11-26 10:08:18 -07:00
2019-01-28 15:01:03 -08:00
popd
2019-07-02 16:50:42 -07:00
# Remove the temp dir
rm -rf "$tmp_torrent_dir"
# Scan the torrent dir for new files, and add them
node --max-old-space-size=8096 scan_torrent_files.js --dir "$torrents_dir"
2019-07-02 16:50:42 -07:00
. prune.sh