Improving pruning.
This commit is contained in:
parent
f5b1282a47
commit
48dc083784
|
@ -19,13 +19,10 @@ header=$(head -n1 $torrents_csv_tmp)
|
||||||
sed -i '1d' $torrents_csv_tmp
|
sed -i '1d' $torrents_csv_tmp
|
||||||
|
|
||||||
# Sort by seeders desc (so when we remove dups it removes the lower seeder counts)
|
# Sort by seeders desc (so when we remove dups it removes the lower seeder counts)
|
||||||
# TODO this should actually probably do it by scraped date
|
|
||||||
# sort --field-separator=';' --key=5 -nr -o $torrents_csv_tmp $torrents_csv_tmp
|
|
||||||
|
|
||||||
# Remove dups, keeping the last ones
|
# Remove dups, keeping the last ones
|
||||||
sort -r -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp
|
sort -r -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp
|
||||||
sort -r -u -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp
|
sort -r -u -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp
|
||||||
# sort -u -t';' -k2,2 -k8,8 -o $torrents_csv_tmp $torrents_csv_tmp
|
|
||||||
|
|
||||||
|
|
||||||
# Same for the infohashes scanned
|
# Same for the infohashes scanned
|
||||||
|
@ -44,4 +41,4 @@ sed -i "1i $header" $torrents_csv_tmp
|
||||||
|
|
||||||
mv $torrents_csv_tmp $torrents_csv
|
mv $torrents_csv_tmp $torrents_csv
|
||||||
|
|
||||||
echo "Pruning done."
|
echo "Pruning done."
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
Refetching seeder counts ...
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_aa
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ab
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ac
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ad
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ae
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_af
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ag
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ah
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ai
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_aj
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ak
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_al
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_am
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_an
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ao
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ap
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_aq
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ar
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_as
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_at
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_au
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_av
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_aw
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ax
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ay
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_az
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_ba
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_bb
|
||||||
|
Fetching seeds...
|
||||||
|
tmp_bc
|
|
@ -2,6 +2,7 @@
|
||||||
echo "Refetching seeder counts ..."
|
echo "Refetching seeder counts ..."
|
||||||
cd ..
|
cd ..
|
||||||
torrents_csv="`pwd`/torrents.csv"
|
torrents_csv="`pwd`/torrents.csv"
|
||||||
|
torrents_removed="`pwd`/torrents_removed.csv"
|
||||||
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
||||||
mkdir $prune_currents_tmps
|
mkdir $prune_currents_tmps
|
||||||
cd $prune_currents_tmps
|
cd $prune_currents_tmps
|
||||||
|
@ -15,7 +16,6 @@ sed -i '1d' tmp
|
||||||
cat tmp | cut -d ';' -f1 > tmp2
|
cat tmp | cut -d ';' -f1 > tmp2
|
||||||
mv tmp2 tmp
|
mv tmp2 tmp
|
||||||
|
|
||||||
mkdir prune_currents_tmps
|
|
||||||
# Split these up into 2000 file batches
|
# Split these up into 2000 file batches
|
||||||
split -l 2000 tmp tmp_
|
split -l 2000 tmp tmp_
|
||||||
|
|
||||||
|
@ -32,9 +32,10 @@ for f in tmp_*; do
|
||||||
done
|
done
|
||||||
|
|
||||||
# Remove those lines from the file
|
# Remove those lines from the file
|
||||||
rg -vwF -f no_seeds $torrents_csv > torrents_removed.csv
|
rg -vwF -f no_seeds $torrents_csv > $torrents_removed
|
||||||
|
|
||||||
rm tmp_*
|
cd ..
|
||||||
|
rm $prune_currents_tmps
|
||||||
rm health
|
rm health
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
Torrents dir=/home/tyler/Tyhous_HD2/git/torrents.csv/torrents
|
||||||
|
torrent-tracker-health installed.
|
||||||
|
~/Tyhous_HD2/git/torrents.csv/torrents ~/Tyhous_HD2/git/torrents.csv
|
||||||
|
sub dir: 1
|
||||||
|
Torrents.csv updated with new torrents.
|
||||||
|
sub dir: 2
|
||||||
|
jq: error (at health.out:12295): date "+047876-11-23T23" does not match format "%Y-%m-%dT%H:%M"
|
||||||
|
There were no results for some reason.
|
||||||
|
sub dir: 3
|
||||||
|
Torrents.csv updated with new torrents.
|
||||||
|
~/Tyhous_HD2/git/torrents.csv
|
||||||
|
Pruning torrents.csv ...
|
||||||
|
Pruning done.
|
Loading…
Reference in New Issue