Improving pruning.
This commit is contained in:
parent
f5b1282a47
commit
48dc083784
|
@ -19,13 +19,10 @@ header=$(head -n1 $torrents_csv_tmp)
|
|||
sed -i '1d' $torrents_csv_tmp
|
||||
|
||||
# Sort by seeders desc (so when we remove dups it removes the lower seeder counts)
|
||||
# TODO this should actually probably do it by scraped date
|
||||
# sort --field-separator=';' --key=5 -nr -o $torrents_csv_tmp $torrents_csv_tmp
|
||||
|
||||
# Remove dups, keeping the last ones
|
||||
sort -r -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp
|
||||
sort -r -u -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp
|
||||
# sort -u -t';' -k2,2 -k8,8 -o $torrents_csv_tmp $torrents_csv_tmp
|
||||
|
||||
|
||||
# Same for the infohashes scanned
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
Refetching seeder counts ...
|
||||
Fetching seeds...
|
||||
tmp_aa
|
||||
Fetching seeds...
|
||||
tmp_ab
|
||||
Fetching seeds...
|
||||
tmp_ac
|
||||
Fetching seeds...
|
||||
tmp_ad
|
||||
Fetching seeds...
|
||||
tmp_ae
|
||||
Fetching seeds...
|
||||
tmp_af
|
||||
Fetching seeds...
|
||||
tmp_ag
|
||||
Fetching seeds...
|
||||
tmp_ah
|
||||
Fetching seeds...
|
||||
tmp_ai
|
||||
Fetching seeds...
|
||||
tmp_aj
|
||||
Fetching seeds...
|
||||
tmp_ak
|
||||
Fetching seeds...
|
||||
tmp_al
|
||||
Fetching seeds...
|
||||
tmp_am
|
||||
Fetching seeds...
|
||||
tmp_an
|
||||
Fetching seeds...
|
||||
tmp_ao
|
||||
Fetching seeds...
|
||||
tmp_ap
|
||||
Fetching seeds...
|
||||
tmp_aq
|
||||
Fetching seeds...
|
||||
tmp_ar
|
||||
Fetching seeds...
|
||||
tmp_as
|
||||
Fetching seeds...
|
||||
tmp_at
|
||||
Fetching seeds...
|
||||
tmp_au
|
||||
Fetching seeds...
|
||||
tmp_av
|
||||
Fetching seeds...
|
||||
tmp_aw
|
||||
Fetching seeds...
|
||||
tmp_ax
|
||||
Fetching seeds...
|
||||
tmp_ay
|
||||
Fetching seeds...
|
||||
tmp_az
|
||||
Fetching seeds...
|
||||
tmp_ba
|
||||
Fetching seeds...
|
||||
tmp_bb
|
||||
Fetching seeds...
|
||||
tmp_bc
|
|
@ -2,6 +2,7 @@
|
|||
echo "Refetching seeder counts ..."
|
||||
cd ..
|
||||
torrents_csv="`pwd`/torrents.csv"
|
||||
torrents_removed="`pwd`/torrents_removed.csv"
|
||||
prune_currents_tmps="`pwd`/prune_currents_tmps"
|
||||
mkdir $prune_currents_tmps
|
||||
cd $prune_currents_tmps
|
||||
|
@ -15,7 +16,6 @@ sed -i '1d' tmp
|
|||
cat tmp | cut -d ';' -f1 > tmp2
|
||||
mv tmp2 tmp
|
||||
|
||||
mkdir prune_currents_tmps
|
||||
# Split these up into 2000 file batches
|
||||
split -l 2000 tmp tmp_
|
||||
|
||||
|
@ -32,9 +32,10 @@ for f in tmp_*; do
|
|||
done
|
||||
|
||||
# Remove those lines from the file
|
||||
rg -vwF -f no_seeds $torrents_csv > torrents_removed.csv
|
||||
rg -vwF -f no_seeds $torrents_csv > $torrents_removed
|
||||
|
||||
rm tmp_*
|
||||
cd ..
|
||||
rm $prune_currents_tmps
|
||||
rm health
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
Torrents dir=/home/tyler/Tyhous_HD2/git/torrents.csv/torrents
|
||||
torrent-tracker-health installed.
|
||||
~/Tyhous_HD2/git/torrents.csv/torrents ~/Tyhous_HD2/git/torrents.csv
|
||||
sub dir: 1
|
||||
Torrents.csv updated with new torrents.
|
||||
sub dir: 2
|
||||
jq: error (at health.out:12295): date "+047876-11-23T23" does not match format "%Y-%m-%dT%H:%M"
|
||||
There were no results for some reason.
|
||||
sub dir: 3
|
||||
Torrents.csv updated with new torrents.
|
||||
~/Tyhous_HD2/git/torrents.csv
|
||||
Pruning torrents.csv ...
|
||||
Pruning done.
|
Loading…
Reference in New Issue