torrents.csv/scripts/prune_currents.sh

70 lines
1.8 KiB
Bash
Raw Normal View History

2019-01-28 15:01:03 -08:00
# This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts
2019-09-23 15:30:38 -07:00
echo "Refetching seeder counts from torrents older than 3 months ..."
2019-01-28 15:01:03 -08:00
cd ..
torrents_csv="`pwd`/torrents.csv"
2019-01-28 16:49:29 -08:00
torrents_removed="`pwd`/torrents_removed.csv"
2019-01-28 15:01:03 -08:00
prune_currents_tmps="`pwd`/prune_currents_tmps"
2019-09-23 15:30:38 -07:00
2019-01-28 15:01:03 -08:00
mkdir $prune_currents_tmps
cd $prune_currents_tmps
cp $torrents_csv tmp
# Extract the header
2019-09-23 15:30:38 -07:00
header=$(head -n1 tmp)
2019-01-28 15:01:03 -08:00
sed -i '1d' tmp
2019-09-23 15:30:38 -07:00
# Get the ones older than 3 months
awk -F';' -v date="$(date -d '3 months ago' '+%s')" '$8 < date' tmp | cut -d ';' -f1 > tmp2
2019-01-28 15:01:03 -08:00
mv tmp2 tmp
2019-09-23 15:30:38 -07:00
# Split these up into 100 file batches
split -l 100 tmp tmp_
2019-01-28 15:01:03 -08:00
> no_seeds
for f in tmp_*; do
echo "Fetching seeds..."
echo $f
2019-09-23 15:30:38 -07:00
torrent-tracker-health --torrent "$f" > health.out
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
# The only reliable things here are scraped_date, hash, seeders, leechers, completed
results=$(jq -r '.results[] | select (.created != null ) | [.hash, .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out)
# If there are no results
if [ -z "$results" ]; then
echo "There were no results for some reason."
cat health.out
else
# Loop over the result lines
while read -r result; do
hash=$(echo "$result" | cut -d ';' -f1)
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
# Get the first columns
found_line=$(grep "$hash" $torrents_csv | cut -d';' -f-4)
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
# Remove the hash column from my fetched results
hash_removed=$(echo "$result" | cut -d';' -f2-)
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
# Append the seeder data to the line
new_line="$found_line"\;"$hash_removed"
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
# Update the torrents.csv and infohashes scanned file
echo "Torrents.csv updated"
echo "$new_line"
echo -e "$new_line" >> $torrents_csv
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
done <<< "$results"
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
fi
rm $f
done
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
rm health.out
cd ..
rm -rf $prune_currents_tmps
cd scripts
2019-01-28 15:01:03 -08:00
2019-09-23 15:30:38 -07:00
. prune.sh