From c0a1e2d301801f1016e3329dffe593849071113f Mon Sep 17 00:00:00 2001 From: Dessalines Date: Thu, 24 Jan 2019 14:45:18 -0800 Subject: [PATCH] Using new tracker health for torrent scanner. Fixes #34 --- scripts/prune.sh | 9 ++++ scripts/scan_torrents.sh | 105 ++++++++++++--------------------------- 2 files changed, 41 insertions(+), 73 deletions(-) diff --git a/scripts/prune.sh b/scripts/prune.sh index 609e44c..64e5c40 100755 --- a/scripts/prune.sh +++ b/scripts/prune.sh @@ -2,6 +2,8 @@ echo "Pruning torrents.csv ..." cd .. torrents_csv="`pwd`/torrents.csv" +scanned_out="`pwd`/infohashes_scanned.txt" + # Remove lines that don't have exactly 7 ';' rg "^([^;]*;){7}[^;]+$" $torrents_csv > tmp_adds @@ -22,6 +24,13 @@ sed -i '1d' $torrents_csv sort -u -t';' -k1,1 -o $torrents_csv $torrents_csv sort -u -t';' -k2,2 -k3,3 -o $torrents_csv $torrents_csv +# Same for the infohashes scanned +sort -u -o $scanned_out $scanned_out + +# Remove torrents with zero seeders +awk -F';' '$5>=1' $torrents_csv> tmp +mv tmp $torrents_csv + # Sort by infohash asc sort --field-separator=';' --key=1 -o $torrents_csv $torrents_csv diff --git a/scripts/scan_torrents.sh b/scripts/scan_torrents.sh index 97b5b4e..e491a16 100755 --- a/scripts/scan_torrents.sh +++ b/scripts/scan_torrents.sh @@ -5,8 +5,12 @@ cd .. torrents_csv="`pwd`/torrents.csv" scanned_out="`pwd`/infohashes_scanned.txt" - +tmp_torrent_dir="`pwd`/tmp_torrents" +names_out="`pwd`/names.out" +health_out="`pwd`/health.out" touch $scanned_out +touch $names_out +touch $health_out help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help" if [ "$1" == "-h" ] || [ -z "$1" ]; then @@ -29,79 +33,34 @@ fi # Loop over all torrents pushd $torrents_dir # for torrent_file in *.torrent; do +# Copy the unscanned torrent files to a temp dir +mkdir $tmp_torrent_dir find `pwd` -name "*.torrent" | sort -n | grep -vFf $scanned_out | while read torrent_file ; do - - file_infohash=$(basename $torrent_file | cut -d'.' -f 1) - - # Scrape it - health_text=$(torrent-tracker-health --torrent $torrent_file --timeout 1000 --addTrackers={udp://tracker.coppersurfer.tk:6969/announce,udp://tracker.internetwarriors.net:1337/announce,udp://tracker.opentrackr.org:1337/announce,udp://exodus.desync.com:6969/announce,udp://explodie.org:6969/announce}) - - echo -e "$health_text" - - infohash=$(jq -r '.hash' <<< $health_text) - name=$(jq -r '.name' <<< $health_text) - size_bytes=$(jq -r '.length' <<< $health_text) - seeders=$(jq -r '.seeds' <<< $health_text) - leechers=$(jq -r '.peers' <<< $health_text) - completed=$(jq -r '.completed' <<< $health_text) - date_string=$(jq -r '.created' <<< $health_text) - if [ "$date_string" == "null" ]; then - echo "Date was null, setting to now" - created_date=$(date +%s) - else - created_date=$(date -d "${date_string}" +"%s") - fi - scraped_date=$(date +%s) - - # Construct add line - add_line="$infohash;$name;$size_bytes;$created_date;$seeders;$leechers;$completed;$scraped_date" - # echo -e $add_line - - if (( $seeders > 0 )); then - - found_line=$(rg -n $infohash $torrents_csv) - found_seeders=$(echo -e $found_line | cut -d';' -f 5) - - # Only re-add if the infohash doesn't exist, or - if [ ! -z "$found_line" ]; then - - # Seeder counts are different - if [ "$found_seeders" != "$seeders" ]; then - - # Delete the original infohash line - rg -Nv "$infohash" $torrents_csv > torfile.tmp.2; mv torfile.tmp.2 $torrents_csv - - # Append the add lines to the torrents.csv file - echo -e "\n$add_line" >> $torrents_csv - # truncate -s -1 $torrents_csv # Removing last newline - echo -e "Updating Seeders: $torrent_file | $name | $infohash | $seeders" - else - echo -e "Not adding $name, had identical seeders" - fi - else - # Append the add lines to the torrents.csv file - echo -e "\n$add_line" >> $torrents_csv - # truncate -s -1 $torrents_csv # Removing last newline - echo -e "New Torrent: $torrent_file | $name | $infohash | $seeders" - fi - - else - # Deleting the line if it existed - if [ ! -z "$infohash" ]; then - # This removes lines that have no seeders - echo -e "$name has no seeders" - if rg -Nq $infohash $torrents_csv; then - echo "Removing $name from $torrents_csv" - rg -Nv "$infohash" $torrents_csv > torfile.tmp.2; mv torfile.tmp.2 $torrents_csv - fi - fi - fi - - echo $file_infohash >> $scanned_out - + cp "$torrent_file" "$tmp_torrent_dir" + echo $(basename "$torrent_file" .torrent) >> $names_out done -popd -cd scripts -. prune.sh +if [ -z "$(ls -A $tmp_torrent_dir)" ]; then + echo "No new torrents." +else + # Scrape it + torrent-tracker-health --torrent "$tmp_torrent_dir" > $health_out + + echo -e "$health_out" + + results=$(jq -r '.results | map([.hash, .name, .length, (.created | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime), .seeders, .leechers, .completed, (now | floor)] | join(";")) | join("\n")' $health_out) + + echo -e "$results" >> $torrents_csv + cat "$names_out" >> $scanned_out + + popd + cd scripts + . prune.sh +fi + +# Remove the temp dir +rm -rf "$tmp_torrent_dir" +rm "$names_out" +rm "$health_out" +