diff --git a/scripts/prune.sh b/scripts/prune.sh index e4f1b26..8fe0441 100755 --- a/scripts/prune.sh +++ b/scripts/prune.sh @@ -19,13 +19,10 @@ header=$(head -n1 $torrents_csv_tmp) sed -i '1d' $torrents_csv_tmp # Sort by seeders desc (so when we remove dups it removes the lower seeder counts) -# TODO this should actually probably do it by scraped date -# sort --field-separator=';' --key=5 -nr -o $torrents_csv_tmp $torrents_csv_tmp # Remove dups, keeping the last ones sort -r -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp sort -r -u -t';' -k1,1 -o $torrents_csv_tmp $torrents_csv_tmp -# sort -u -t';' -k2,2 -k8,8 -o $torrents_csv_tmp $torrents_csv_tmp # Same for the infohashes scanned @@ -44,4 +41,4 @@ sed -i "1i $header" $torrents_csv_tmp mv $torrents_csv_tmp $torrents_csv -echo "Pruning done." \ No newline at end of file +echo "Pruning done." diff --git a/scripts/prune_currents.out b/scripts/prune_currents.out new file mode 100644 index 0000000..cfcb4b6 --- /dev/null +++ b/scripts/prune_currents.out @@ -0,0 +1,59 @@ +Refetching seeder counts ... +Fetching seeds... +tmp_aa +Fetching seeds... +tmp_ab +Fetching seeds... +tmp_ac +Fetching seeds... +tmp_ad +Fetching seeds... +tmp_ae +Fetching seeds... +tmp_af +Fetching seeds... +tmp_ag +Fetching seeds... +tmp_ah +Fetching seeds... +tmp_ai +Fetching seeds... +tmp_aj +Fetching seeds... +tmp_ak +Fetching seeds... +tmp_al +Fetching seeds... +tmp_am +Fetching seeds... +tmp_an +Fetching seeds... +tmp_ao +Fetching seeds... +tmp_ap +Fetching seeds... +tmp_aq +Fetching seeds... +tmp_ar +Fetching seeds... +tmp_as +Fetching seeds... +tmp_at +Fetching seeds... +tmp_au +Fetching seeds... +tmp_av +Fetching seeds... +tmp_aw +Fetching seeds... +tmp_ax +Fetching seeds... +tmp_ay +Fetching seeds... +tmp_az +Fetching seeds... +tmp_ba +Fetching seeds... +tmp_bb +Fetching seeds... +tmp_bc diff --git a/scripts/prune_currents.sh b/scripts/prune_currents.sh index f678f69..a20e4ba 100755 --- a/scripts/prune_currents.sh +++ b/scripts/prune_currents.sh @@ -2,6 +2,7 @@ echo "Refetching seeder counts ..." cd .. torrents_csv="`pwd`/torrents.csv" +torrents_removed="`pwd`/torrents_removed.csv" prune_currents_tmps="`pwd`/prune_currents_tmps" mkdir $prune_currents_tmps cd $prune_currents_tmps @@ -15,7 +16,6 @@ sed -i '1d' tmp cat tmp | cut -d ';' -f1 > tmp2 mv tmp2 tmp -mkdir prune_currents_tmps # Split these up into 2000 file batches split -l 2000 tmp tmp_ @@ -32,9 +32,10 @@ for f in tmp_*; do done # Remove those lines from the file -rg -vwF -f no_seeds $torrents_csv > torrents_removed.csv +rg -vwF -f no_seeds $torrents_csv > $torrents_removed -rm tmp_* +cd .. +rm $prune_currents_tmps rm health diff --git a/scripts/scan_torrents.out b/scripts/scan_torrents.out new file mode 100644 index 0000000..d483bce --- /dev/null +++ b/scripts/scan_torrents.out @@ -0,0 +1,13 @@ +Torrents dir=/home/tyler/Tyhous_HD2/git/torrents.csv/torrents +torrent-tracker-health installed. +~/Tyhous_HD2/git/torrents.csv/torrents ~/Tyhous_HD2/git/torrents.csv +sub dir: 1 +Torrents.csv updated with new torrents. +sub dir: 2 +jq: error (at health.out:12295): date "+047876-11-23T23" does not match format "%Y-%m-%dT%H:%M" +There were no results for some reason. +sub dir: 3 +Torrents.csv updated with new torrents. +~/Tyhous_HD2/git/torrents.csv +Pruning torrents.csv ... +Pruning done.