From 0b570e6249e843e94147330a2541a60001c89aa3 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Sun, 2 Dec 2018 11:39:57 -0700 Subject: [PATCH] Adding a resort based on infohash, not seeders descending, since were sorting by seeders descending before caching to sqlite. --- scripts/build_sqlite.sh | 13 +++++++++++-- scripts/prune.sh | 8 ++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/scripts/build_sqlite.sh b/scripts/build_sqlite.sh index a67272f..8137db3 100755 --- a/scripts/build_sqlite.sh +++ b/scripts/build_sqlite.sh @@ -1,6 +1,11 @@ -# Remove quotes +echo "Creating temporary torrents.db file..." + +# Remove double quotes for csv import sed 's/\"//g' ../torrents.csv > torrents_removed_quotes.csv +# Sort by seeders desc before insert +sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv + rm ../torrents.db sqlite3 -batch ../torrents.db <<"EOF" @@ -17,7 +22,11 @@ create table torrents( .separator ";" .import torrents_removed_quotes.csv torrents UPDATE torrents SET completed=NULL WHERE completed = ''; -create index name_index on torrents (name); +# create index idx_name_seeders on torrents (name, seeders desc); +# create index idx_name on torrents (name); +# create index idx_seeders on torrents (seeders desc); +# create index idx_name on torrents (name collate nocase); + EOF rm torrents_removed_quotes.csv diff --git a/scripts/prune.sh b/scripts/prune.sh index aed1c02..5b25507 100755 --- a/scripts/prune.sh +++ b/scripts/prune.sh @@ -14,12 +14,16 @@ sed -i '/^$/d' $torrents_csv header=$(head -n1 $torrents_csv) sed -i '1d' $torrents_csv +# Sort by seeders desc (so when we remove dups it removes the lower seeder counts) +# TODO this should actually probably do it by scraped date +sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv + # Remove dups sort -u -t';' -k1,1 -o $torrents_csv $torrents_csv sort -u -t';' -k2,2 -k3,3 -o $torrents_csv $torrents_csv - # Sort by seeders desc -sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv +# Sort by infohash asc +sort --field-separator=';' --key=1 -o $torrents_csv $torrents_csv # Add the header back in sed -i "1i $header" $torrents_csv