diff --git a/docker/dev/deploy.sh b/docker/dev/deploy.sh index 94dd243..045c713 100755 --- a/docker/dev/deploy.sh +++ b/docker/dev/deploy.sh @@ -17,5 +17,5 @@ git push # Rebuilding docker docker-compose build -docker tag dev_lemmy:latest dessalines/torrents-csv:$new_tag +docker tag dev_torrents-csv:latest dessalines/torrents-csv:$new_tag docker push dessalines/torrents-csv:$new_tag diff --git a/new_torrents_fetcher/Cargo.lock b/new_torrents_fetcher/Cargo.lock index ad04884..41c33b1 100644 --- a/new_torrents_fetcher/Cargo.lock +++ b/new_torrents_fetcher/Cargo.lock @@ -1,3 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. [[package]] name = "adler32" version = "1.0.3" diff --git a/new_torrents_fetcher/src/cf.py b/new_torrents_fetcher/src/cf.py index 3db2e37..ae55443 100644 --- a/new_torrents_fetcher/src/cf.py +++ b/new_torrents_fetcher/src/cf.py @@ -1,7 +1,7 @@ -import cfscrape +import cloudscraper request = "GET / HTTP/1.1\r\n" -cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent") +cookie_value, user_agent = cloudscraper.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent") request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent) # cookie = "Cookie: %s" % (cookie_value) diff --git a/scripts/prune_currents.sh b/scripts/prune_currents.sh index 0f9096a..f0c7687 100755 --- a/scripts/prune_currents.sh +++ b/scripts/prune_currents.sh @@ -1,53 +1,69 @@ # This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts -echo "Refetching seeder counts ..." +echo "Refetching seeder counts from torrents older than 3 months ..." cd .. torrents_csv="`pwd`/torrents.csv" torrents_removed="`pwd`/torrents_removed.csv" prune_currents_tmps="`pwd`/prune_currents_tmps" + mkdir $prune_currents_tmps cd $prune_currents_tmps cp $torrents_csv tmp # Extract the header -header=$(head -n1 tmp) +header=$(head -n1 tmp) sed -i '1d' tmp -cat tmp | cut -d ';' -f1 > tmp2 +# Get the ones older than 3 months +awk -F';' -v date="$(date -d '3 months ago' '+%s')" '$8 < date' tmp | cut -d ';' -f1 > tmp2 + mv tmp2 tmp -# Split these up into 2000 file batches -split -l 2000 tmp tmp_ +# Split these up into 100 file batches +split -l 100 tmp tmp_ > no_seeds for f in tmp_*; do echo "Fetching seeds..." echo $f - torrent-tracker-health --torrent "$f" > health + torrent-tracker-health --torrent "$f" > health.out - # Select the infohashes with zero seeders - # append to a no seeds file - jq '.results[] | select(.seeders==0) | .hash' health | tr -d \" >> no_seeds + + # The only reliable things here are scraped_date, hash, seeders, leechers, completed + results=$(jq -r '.results[] | select (.created != null ) | [.hash, .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out) + # If there are no results + if [ -z "$results" ]; then + echo "There were no results for some reason." + cat health.out + else + # Loop over the result lines + while read -r result; do + hash=$(echo "$result" | cut -d ';' -f1) + + # Get the first columns + found_line=$(grep "$hash" $torrents_csv | cut -d';' -f-4) + + # Remove the hash column from my fetched results + hash_removed=$(echo "$result" | cut -d';' -f2-) + + # Append the seeder data to the line + new_line="$found_line"\;"$hash_removed" + + # Update the torrents.csv and infohashes scanned file + echo "Torrents.csv updated" + echo "$new_line" + echo -e "$new_line" >> $torrents_csv + + done <<< "$results" + + + fi rm $f done -# Remove those lines from the file -grep -vwF -f no_seeds $torrents_csv > $torrents_removed - +rm health.out cd .. -rm $prune_currents_tmps -rm health - - - - - - - - - - - - - +rm -rf $prune_currents_tmps +cd scripts +. prune.sh diff --git a/scripts/rescan_olds.sh b/scripts/rescan_olds.sh deleted file mode 100644 index 659719e..0000000 --- a/scripts/rescan_olds.sh +++ /dev/null @@ -1,7 +0,0 @@ -# Rescan everything that hasn't been scanned in a while -cd ../ -awk -F';' '$8 < $(date -d "6 months ago" "+%s")' torrents.csv | cut -d ';' -f1 > hashes_to_rescan -grep -vFf hashes_to_rescan infohashes_scanned.txt > new_infohashes_scanned -rm hashes_to_rescan -mv new_infohashes_scanned infohashes_scanned.txt - diff --git a/scripts/scan_torrents.sh b/scripts/scan_torrents.sh index 1cb5170..9d8400f 100755 --- a/scripts/scan_torrents.sh +++ b/scripts/scan_torrents.sh @@ -63,6 +63,8 @@ for tmp_torrent_dir_sub in *; do cat health.out else echo "Torrents.csv updated with new torrents." + echo "$results" + # Update the torrents.csv and infohashes scanned file echo -e "$results" >> $torrents_csv cat names.out >> $scanned_out