2019-01-28 15:01:03 -08:00
|
|
|
# This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts
|
2019-09-23 15:30:38 -07:00
|
|
|
echo "Refetching seeder counts from torrents older than 3 months ..."
|
2019-01-28 15:01:03 -08:00
|
|
|
cd ..
|
|
|
|
torrents_csv="`pwd`/torrents.csv"
|
2019-01-28 16:49:29 -08:00
|
|
|
torrents_removed="`pwd`/torrents_removed.csv"
|
2019-01-28 15:01:03 -08:00
|
|
|
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
2019-09-23 15:30:38 -07:00
|
|
|
|
2019-01-28 15:01:03 -08:00
|
|
|
mkdir $prune_currents_tmps
|
|
|
|
cd $prune_currents_tmps
|
|
|
|
|
|
|
|
cp $torrents_csv tmp
|
|
|
|
|
|
|
|
# Extract the header
|
2019-09-23 15:30:38 -07:00
|
|
|
header=$(head -n1 tmp)
|
2019-01-28 15:01:03 -08:00
|
|
|
sed -i '1d' tmp
|
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# Get the ones older than 3 months
|
|
|
|
awk -F';' -v date="$(date -d '3 months ago' '+%s')" '$8 < date' tmp | cut -d ';' -f1 > tmp2
|
|
|
|
|
2019-01-28 15:01:03 -08:00
|
|
|
mv tmp2 tmp
|
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# Split these up into 100 file batches
|
|
|
|
split -l 100 tmp tmp_
|
2019-01-28 15:01:03 -08:00
|
|
|
|
|
|
|
> no_seeds
|
|
|
|
for f in tmp_*; do
|
|
|
|
echo "Fetching seeds..."
|
|
|
|
echo $f
|
2019-09-23 15:30:38 -07:00
|
|
|
torrent-tracker-health --torrent "$f" > health.out
|
2019-01-28 15:01:03 -08:00
|
|
|
|
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# The only reliable things here are scraped_date, hash, seeders, leechers, completed
|
|
|
|
results=$(jq -r '.results[] | select (.created != null ) | [.hash, .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out)
|
|
|
|
# If there are no results
|
|
|
|
if [ -z "$results" ]; then
|
|
|
|
echo "There were no results for some reason."
|
|
|
|
cat health.out
|
|
|
|
else
|
|
|
|
# Loop over the result lines
|
|
|
|
while read -r result; do
|
|
|
|
hash=$(echo "$result" | cut -d ';' -f1)
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# Get the first columns
|
|
|
|
found_line=$(grep "$hash" $torrents_csv | cut -d';' -f-4)
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# Remove the hash column from my fetched results
|
|
|
|
hash_removed=$(echo "$result" | cut -d';' -f2-)
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# Append the seeder data to the line
|
|
|
|
new_line="$found_line"\;"$hash_removed"
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
# Update the torrents.csv and infohashes scanned file
|
|
|
|
echo "Torrents.csv updated"
|
|
|
|
echo "$new_line"
|
|
|
|
echo -e "$new_line" >> $torrents_csv
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
done <<< "$results"
|
2019-01-28 15:01:03 -08:00
|
|
|
|
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
fi
|
|
|
|
rm $f
|
|
|
|
done
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
rm health.out
|
|
|
|
cd ..
|
|
|
|
rm -rf $prune_currents_tmps
|
|
|
|
cd scripts
|
2019-01-28 15:01:03 -08:00
|
|
|
|
2019-09-23 15:30:38 -07:00
|
|
|
. prune.sh
|