Fixing docker, updating some scripts.
This commit is contained in:
parent
c2ea68d499
commit
72e776e8be
|
@ -17,5 +17,5 @@ git push
|
||||||
|
|
||||||
# Rebuilding docker
|
# Rebuilding docker
|
||||||
docker-compose build
|
docker-compose build
|
||||||
docker tag dev_lemmy:latest dessalines/torrents-csv:$new_tag
|
docker tag dev_torrents-csv:latest dessalines/torrents-csv:$new_tag
|
||||||
docker push dessalines/torrents-csv:$new_tag
|
docker push dessalines/torrents-csv:$new_tag
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "adler32"
|
name = "adler32"
|
||||||
version = "1.0.3"
|
version = "1.0.3"
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import cfscrape
|
import cloudscraper
|
||||||
request = "GET / HTTP/1.1\r\n"
|
request = "GET / HTTP/1.1\r\n"
|
||||||
|
|
||||||
cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
|
cookie_value, user_agent = cloudscraper.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
|
||||||
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
|
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
|
||||||
# cookie = "Cookie: %s" % (cookie_value)
|
# cookie = "Cookie: %s" % (cookie_value)
|
||||||
|
|
||||||
|
|
|
@ -1,53 +1,69 @@
|
||||||
# This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts
|
# This refetches the seeder counts for everthing in torrents.csv, and updates the seeder counts
|
||||||
echo "Refetching seeder counts ..."
|
echo "Refetching seeder counts from torrents older than 3 months ..."
|
||||||
cd ..
|
cd ..
|
||||||
torrents_csv="`pwd`/torrents.csv"
|
torrents_csv="`pwd`/torrents.csv"
|
||||||
torrents_removed="`pwd`/torrents_removed.csv"
|
torrents_removed="`pwd`/torrents_removed.csv"
|
||||||
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
||||||
|
|
||||||
mkdir $prune_currents_tmps
|
mkdir $prune_currents_tmps
|
||||||
cd $prune_currents_tmps
|
cd $prune_currents_tmps
|
||||||
|
|
||||||
cp $torrents_csv tmp
|
cp $torrents_csv tmp
|
||||||
|
|
||||||
# Extract the header
|
# Extract the header
|
||||||
header=$(head -n1 tmp)
|
header=$(head -n1 tmp)
|
||||||
sed -i '1d' tmp
|
sed -i '1d' tmp
|
||||||
|
|
||||||
cat tmp | cut -d ';' -f1 > tmp2
|
# Get the ones older than 3 months
|
||||||
|
awk -F';' -v date="$(date -d '3 months ago' '+%s')" '$8 < date' tmp | cut -d ';' -f1 > tmp2
|
||||||
|
|
||||||
mv tmp2 tmp
|
mv tmp2 tmp
|
||||||
|
|
||||||
# Split these up into 2000 file batches
|
# Split these up into 100 file batches
|
||||||
split -l 2000 tmp tmp_
|
split -l 100 tmp tmp_
|
||||||
|
|
||||||
> no_seeds
|
> no_seeds
|
||||||
for f in tmp_*; do
|
for f in tmp_*; do
|
||||||
echo "Fetching seeds..."
|
echo "Fetching seeds..."
|
||||||
echo $f
|
echo $f
|
||||||
torrent-tracker-health --torrent "$f" > health
|
torrent-tracker-health --torrent "$f" > health.out
|
||||||
|
|
||||||
# Select the infohashes with zero seeders
|
|
||||||
# append to a no seeds file
|
# The only reliable things here are scraped_date, hash, seeders, leechers, completed
|
||||||
jq '.results[] | select(.seeders==0) | .hash' health | tr -d \" >> no_seeds
|
results=$(jq -r '.results[] | select (.created != null ) | [.hash, .seeders, .leechers, .completed, (now | floor)] | join(";")' health.out)
|
||||||
|
# If there are no results
|
||||||
|
if [ -z "$results" ]; then
|
||||||
|
echo "There were no results for some reason."
|
||||||
|
cat health.out
|
||||||
|
else
|
||||||
|
# Loop over the result lines
|
||||||
|
while read -r result; do
|
||||||
|
hash=$(echo "$result" | cut -d ';' -f1)
|
||||||
|
|
||||||
|
# Get the first columns
|
||||||
|
found_line=$(grep "$hash" $torrents_csv | cut -d';' -f-4)
|
||||||
|
|
||||||
|
# Remove the hash column from my fetched results
|
||||||
|
hash_removed=$(echo "$result" | cut -d';' -f2-)
|
||||||
|
|
||||||
|
# Append the seeder data to the line
|
||||||
|
new_line="$found_line"\;"$hash_removed"
|
||||||
|
|
||||||
|
# Update the torrents.csv and infohashes scanned file
|
||||||
|
echo "Torrents.csv updated"
|
||||||
|
echo "$new_line"
|
||||||
|
echo -e "$new_line" >> $torrents_csv
|
||||||
|
|
||||||
|
done <<< "$results"
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
rm $f
|
rm $f
|
||||||
done
|
done
|
||||||
|
|
||||||
# Remove those lines from the file
|
rm health.out
|
||||||
grep -vwF -f no_seeds $torrents_csv > $torrents_removed
|
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
rm $prune_currents_tmps
|
rm -rf $prune_currents_tmps
|
||||||
rm health
|
cd scripts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
. prune.sh
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
# Rescan everything that hasn't been scanned in a while
|
|
||||||
cd ../
|
|
||||||
awk -F';' '$8 < $(date -d "6 months ago" "+%s")' torrents.csv | cut -d ';' -f1 > hashes_to_rescan
|
|
||||||
grep -vFf hashes_to_rescan infohashes_scanned.txt > new_infohashes_scanned
|
|
||||||
rm hashes_to_rescan
|
|
||||||
mv new_infohashes_scanned infohashes_scanned.txt
|
|
||||||
|
|
|
@ -63,6 +63,8 @@ for tmp_torrent_dir_sub in *; do
|
||||||
cat health.out
|
cat health.out
|
||||||
else
|
else
|
||||||
echo "Torrents.csv updated with new torrents."
|
echo "Torrents.csv updated with new torrents."
|
||||||
|
echo "$results"
|
||||||
|
|
||||||
# Update the torrents.csv and infohashes scanned file
|
# Update the torrents.csv and infohashes scanned file
|
||||||
echo -e "$results" >> $torrents_csv
|
echo -e "$results" >> $torrents_csv
|
||||||
cat names.out >> $scanned_out
|
cat names.out >> $scanned_out
|
||||||
|
|
Loading…
Reference in New Issue