Fixing docker, updating some scripts.
This commit is contained in:
parent
c2ea68d499
commit
72e776e8be
@ -17,5 +17,5 @@ git push
|
||||
|
||||
# Rebuilding docker
|
||||
docker-compose build
|
||||
docker tag dev_lemmy:latest dessalines/torrents-csv:$new_tag
|
||||
docker tag dev_torrents-csv:latest dessalines/torrents-csv:$new_tag
|
||||
docker push dessalines/torrents-csv:$new_tag
|
||||
|
2
new_torrents_fetcher/Cargo.lock
generated
2
new_torrents_fetcher/Cargo.lock
generated
@ -1,3 +1,5 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "adler32"
|
||||
version = "1.0.3"
|
||||
|
@ -1,7 +1,7 @@
|
||||
import cfscrape
|
||||
import cloudscraper
|
||||
request = "GET / HTTP/1.1\r\n"
|
||||
|
||||
cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
|
||||
cookie_value, user_agent = cloudscraper.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
|
||||
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
|
||||
# cookie = "Cookie: %s" % (cookie_value)
|
||||
|
||||
|
@ -1,53 +1,69 @@
|
||||
# This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts
|
||||
echo "Refetching seeder counts ..."
|
||||
echo "Refetching seeder counts from torrents older than 3 months ..."
|
||||
cd ..
|
||||
torrents_csv="`pwd`/torrents.csv"
|
||||
torrents_removed="`pwd`/torrents_removed.csv"
|
||||
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
||||
|
||||
mkdir $prune_currents_tmps
|
||||
cd $prune_currents_tmps
|
||||
|
||||
cp $torrents_csv tmp
|
||||
|
||||
# Extract the header
|
||||
header=$(head -n1 tmp)
|
||||
header=$(head -n1 tmp)
|
||||
sed -i '1d' tmp
|
||||
|
||||
cat tmp | cut -d ';' -f1 > tmp2
|
||||
# Get the ones older than 3 months
|
||||
awk -F';' -v date="$(date -d '3 months ago' '+%s')" '$8 < date' tmp | cut -d ';' -f1 > tmp2
|
||||
|
||||
mv tmp2 tmp
|
||||
|
||||
# Split these up into 2000 file batches
|
||||
split -l 2000 tmp tmp_
|
||||
# Split these up into 100 file batches
|
||||
split -l 100 tmp tmp_
|
||||
|
||||
> no_seeds
|
||||
for f in tmp_*; do
|
||||
echo "Fetching seeds..."
|
||||
echo $f
|
||||
torrent-tracker-health --torrent "$f" > health
|
||||
torrent-tracker-health --torrent "$f" > health.out
|
||||
|
||||
# Select the infohashes with zero seeders
|
||||
# append to a no seeds file
|
||||
jq '.results[] | select(.seeders==0) | .hash' health | tr -d \" >> no_seeds
|
||||
|
||||
# The only reliable things here are scraped_date, hash, seeders, leechers, completed
|
||||
results=$(jq -r '.results[] | select (.created != null ) | [.hash, .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out)
|
||||
# If there are no results
|
||||
if [ -z "$results" ]; then
|
||||
echo "There were no results for some reason."
|
||||
cat health.out
|
||||
else
|
||||
# Loop over the result lines
|
||||
while read -r result; do
|
||||
hash=$(echo "$result" | cut -d ';' -f1)
|
||||
|
||||
# Get the first columns
|
||||
found_line=$(grep "$hash" $torrents_csv | cut -d';' -f-4)
|
||||
|
||||
# Remove the hash column from my fetched results
|
||||
hash_removed=$(echo "$result" | cut -d';' -f2-)
|
||||
|
||||
# Append the seeder data to the line
|
||||
new_line="$found_line"\;"$hash_removed"
|
||||
|
||||
# Update the torrents.csv and infohashes scanned file
|
||||
echo "Torrents.csv updated"
|
||||
echo "$new_line"
|
||||
echo -e "$new_line" >> $torrents_csv
|
||||
|
||||
done <<< "$results"
|
||||
|
||||
|
||||
fi
|
||||
rm $f
|
||||
done
|
||||
|
||||
# Remove those lines from the file
|
||||
grep -vwF -f no_seeds $torrents_csv > $torrents_removed
|
||||
|
||||
rm health.out
|
||||
cd ..
|
||||
rm $prune_currents_tmps
|
||||
rm health
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
rm -rf $prune_currents_tmps
|
||||
cd scripts
|
||||
|
||||
. prune.sh
|
||||
|
@ -1,7 +0,0 @@
|
||||
# Rescan everything that hasn't been scanned in a while
|
||||
cd ../
|
||||
awk -F';' '$8 < $(date -d "6 months ago" "+%s")' torrents.csv | cut -d ';' -f1 > hashes_to_rescan
|
||||
grep -vFf hashes_to_rescan infohashes_scanned.txt > new_infohashes_scanned
|
||||
rm hashes_to_rescan
|
||||
mv new_infohashes_scanned infohashes_scanned.txt
|
||||
|
@ -63,6 +63,8 @@ for tmp_torrent_dir_sub in *; do
|
||||
cat health.out
|
||||
else
|
||||
echo "Torrents.csv updated with new torrents."
|
||||
echo "$results"
|
||||
|
||||
# Update the torrents.csv and infohashes scanned file
|
||||
echo -e "$results" >> $torrents_csv
|
||||
cat names.out >> $scanned_out
|
||||
|
Loading…
x
Reference in New Issue
Block a user