diff --git a/scripts/build_sqlite.sh b/scripts/build_sqlite.sh index a67272f..8137db3 100755 --- a/scripts/build_sqlite.sh +++ b/scripts/build_sqlite.sh @@ -1,6 +1,11 @@ -# Remove quotes +echo "Creating temporary torrents.db file..." + +# Remove double quotes for csv import sed 's/\"//g' ../torrents.csv > torrents_removed_quotes.csv +# Sort by seeders desc before insert +sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv + rm ../torrents.db sqlite3 -batch ../torrents.db <<"EOF" @@ -17,7 +22,11 @@ create table torrents( .separator ";" .import torrents_removed_quotes.csv torrents UPDATE torrents SET completed=NULL WHERE completed = ''; -create index name_index on torrents (name); +# create index idx_name_seeders on torrents (name, seeders desc); +# create index idx_name on torrents (name); +# create index idx_seeders on torrents (seeders desc); +# create index idx_name on torrents (name collate nocase); + EOF rm torrents_removed_quotes.csv diff --git a/scripts/prune.sh b/scripts/prune.sh index aed1c02..5b25507 100755 --- a/scripts/prune.sh +++ b/scripts/prune.sh @@ -14,12 +14,16 @@ sed -i '/^$/d' $torrents_csv header=$(head -n1 $torrents_csv) sed -i '1d' $torrents_csv +# Sort by seeders desc (so when we remove dups it removes the lower seeder counts) +# TODO this should actually probably do it by scraped date +sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv + # Remove dups sort -u -t';' -k1,1 -o $torrents_csv $torrents_csv sort -u -t';' -k2,2 -k3,3 -o $torrents_csv $torrents_csv - # Sort by seeders desc -sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv +# Sort by infohash asc +sort --field-separator=';' --key=1 -o $torrents_csv $torrents_csv # Add the header back in sed -i "1i $header" $torrents_csv