torrents.csv/scripts/build_sqlite.sh

102 lines
2.2 KiB
Bash
Raw Normal View History

2018-12-02 14:05:00 -07:00
#!/bin/bash
2019-03-18 12:31:32 -07:00
csv_file="../torrents.csv"
torrent_files_csv="../torrent_files.csv"
2018-12-02 22:19:03 -07:00
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
2020-01-09 12:13:22 -05:00
build_files=false
while getopts ":f" opt; do
case $opt in
f)
build_files=true
;;
\?)
echo "Invalid option: -$OPTARG" >&2
;;
esac
done
2018-12-02 14:05:00 -07:00
echo "Creating temporary torrents.db file..."
# Remove double quotes for csv import
2018-12-02 22:19:03 -07:00
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
# Sort by seeders desc before insert
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
touch db_tmp
sqlite3 -batch db_tmp <<"EOF"
drop table if exists torrents;
create table torrents(
"infohash" TEXT,
"name" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
.separator ";"
.import torrents_removed_quotes.csv torrents
UPDATE torrents SET completed=NULL WHERE completed = '';
EOF
rm torrents_removed_quotes.csv
2020-01-09 12:13:22 -05:00
if $build_files ; then
# Cache torrent files
echo "Building files DB from $torrent_files_csv ..."
2020-01-09 12:13:22 -05:00
# Remove double quotes for csv import
sed 's/\"//g' $torrent_files_csv > torrent_files_removed_quotes.csv
2019-04-10 15:48:23 -07:00
2020-01-09 12:13:22 -05:00
# Removing those with too many ;
awk -F \; 'NF == 4' <torrent_files_removed_quotes.csv > torrent_files_temp_2
2019-08-27 21:53:14 -07:00
2020-01-09 12:13:22 -05:00
rm torrent_files_removed_quotes.csv
mv torrent_files_temp_2 torrent_files_temp
2019-03-18 12:31:32 -07:00
sqlite3 -batch db_tmp<<EOF
create table files_tmp(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER
);
.separator ";"
2019-03-18 12:31:32 -07:00
.import torrent_files_temp files_tmp
-- Filling the extra columns
2019-03-18 12:31:32 -07:00
create table files(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
insert into files
2019-03-18 12:31:32 -07:00
select files_tmp.infohash,
files_tmp.index_,
files_tmp.path,
files_tmp.size_bytes,
torrents.created_unix,
torrents.seeders,
torrents.leechers,
torrents.completed,
torrents.scraped_date
2019-03-18 12:31:32 -07:00
from files_tmp
inner join torrents on files_tmp.infohash = torrents.infohash
order by torrents.seeders desc, files_tmp.size_bytes desc;
delete from files where seeders is null;
2019-03-18 12:31:32 -07:00
drop table files_tmp;
EOF
2019-04-10 15:48:23 -07:00
rm torrent_files_temp
2020-01-26 21:58:51 -05:00
fi
mv db_tmp $db_file
echo "Done."