Fixing docker, updating some scripts.

This commit is contained in:
Dessalines 2019-09-23 15:30:38 -07:00
parent c2ea68d499
commit 72e776e8be
6 changed files with 50 additions and 37 deletions

View File

@ -17,5 +17,5 @@ git push
# Rebuilding docker
docker-compose build
docker tag dev_lemmy:latest dessalines/torrents-csv:$new_tag
docker tag dev_torrents-csv:latest dessalines/torrents-csv:$new_tag
docker push dessalines/torrents-csv:$new_tag

View File

@ -1,3 +1,5 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "adler32"
version = "1.0.3"

View File

@ -1,7 +1,7 @@
import cfscrape
import cloudscraper
request = "GET / HTTP/1.1\r\n"
cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
cookie_value, user_agent = cloudscraper.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
# cookie = "Cookie: %s" % (cookie_value)

View File

@ -1,53 +1,69 @@
# This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts
echo "Refetching seeder counts ..."
echo "Refetching seeder counts from torrents older than 3 months ..."
cd ..
torrents_csv="`pwd`/torrents.csv"
torrents_removed="`pwd`/torrents_removed.csv"
prune_currents_tmps="`pwd`/prune_currents_tmps"
mkdir $prune_currents_tmps
cd $prune_currents_tmps
cp $torrents_csv tmp
# Extract the header
header=$(head -n1 tmp)
header=$(head -n1 tmp)
sed -i '1d' tmp
cat tmp | cut -d ';' -f1 > tmp2
# Get the ones older than 3 months
awk -F';' -v date="$(date -d '3 months ago' '+%s')" '$8 < date' tmp | cut -d ';' -f1 > tmp2
mv tmp2 tmp
# Split these up into 2000 file batches
split -l 2000 tmp tmp_
# Split these up into 100 file batches
split -l 100 tmp tmp_
> no_seeds
for f in tmp_*; do
echo "Fetching seeds..."
echo $f
torrent-tracker-health --torrent "$f" > health
torrent-tracker-health --torrent "$f" > health.out
# Select the infohashes with zero seeders
# append to a no seeds file
jq '.results[] | select(.seeders==0) | .hash' health | tr -d \" >> no_seeds
# The only reliable things here are scraped_date, hash, seeders, leechers, completed
results=$(jq -r '.results[] | select (.created != null ) | [.hash, .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out)
# If there are no results
if [ -z "$results" ]; then
echo "There were no results for some reason."
cat health.out
else
# Loop over the result lines
while read -r result; do
hash=$(echo "$result" | cut -d ';' -f1)
# Get the first columns
found_line=$(grep "$hash" $torrents_csv | cut -d';' -f-4)
# Remove the hash column from my fetched results
hash_removed=$(echo "$result" | cut -d';' -f2-)
# Append the seeder data to the line
new_line="$found_line"\;"$hash_removed"
# Update the torrents.csv and infohashes scanned file
echo "Torrents.csv updated"
echo "$new_line"
echo -e "$new_line" >> $torrents_csv
done <<< "$results"
fi
rm $f
done
# Remove those lines from the file
grep -vwF -f no_seeds $torrents_csv > $torrents_removed
rm health.out
cd ..
rm $prune_currents_tmps
rm health
rm -rf $prune_currents_tmps
cd scripts
. prune.sh

View File

@ -1,7 +0,0 @@
# Rescan everything that hasn't been scanned in a while
cd ../
awk -F';' '$8 < $(date -d "6 months ago" "+%s")' torrents.csv | cut -d ';' -f1 > hashes_to_rescan
grep -vFf hashes_to_rescan infohashes_scanned.txt > new_infohashes_scanned
rm hashes_to_rescan
mv new_infohashes_scanned infohashes_scanned.txt

View File

@ -63,6 +63,8 @@ for tmp_torrent_dir_sub in *; do
cat health.out
else
echo "Torrents.csv updated with new torrents."
echo "$results"
# Update the torrents.csv and infohashes scanned file
echo -e "$results" >> $torrents_csv
cat names.out >> $scanned_out