torrents.csv/scripts/build_sqlite.sh

80 lines
2.0 KiB
Bash
Raw Normal View History

2018-12-02 21:05:00 +00:00
#!/bin/bash
2018-12-03 05:19:03 +00:00
csv_file="${TORRENTS_CSV_FILE:-../torrents.csv}"
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
torrent_files_json="`pwd`/../torrent_files.json"
2018-12-02 21:05:00 +00:00
echo "Creating temporary torrents.db file from $csv_file ..."
# Remove double quotes for csv import
2018-12-03 05:19:03 +00:00
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
# Sort by seeders desc before insert
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
touch db_tmp
sqlite3 -batch db_tmp <<"EOF"
drop table if exists torrents;
create table torrents(
"infohash" TEXT,
"name" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
.separator ";"
.import torrents_removed_quotes.csv torrents
UPDATE torrents SET completed=NULL WHERE completed = '';
EOF
rm torrents_removed_quotes.csv
# Cache torrent files if they exist
if [ -f $torrent_files_json ]; then
echo "Building files DB from $torrent_files_json ..."
jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' $torrent_files_json > torrent_files_temp
# Removing those with too many ;
rg "^([^;]*;){3}[^;]+$" torrent_files_temp > torrent_files_temp_2
mv torrent_files_temp_2 torrent_files_temp
2019-02-06 18:34:20 +00:00
sqlite3 -batch db_tmp 2>/dev/null <<"EOF"
drop table if exists files;
create table files(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER);
.separator ";"
.import torrent_files_temp files
-- Filling the extra columns
insert into files
select files.infohash,
files.index_,
files.path,
files.size_bytes,
torrents.created_unix,
torrents.seeders,
torrents.leechers,
torrents.completed,
torrents.scraped_date
from files
inner join torrents on files.infohash = torrents.infohash
order by torrents.seeders desc, files.size_bytes desc;
delete from files where seeders is null;
EOF
rm torrent_files_temp
fi
mv db_tmp $db_file
echo "Done."