torrents.csv/scripts/build_sqlite.sh

93 lines
2.5 KiB
Bash
Raw Normal View History

2018-12-02 21:05:00 +00:00
#!/bin/bash
2019-03-18 19:31:32 +00:00
csv_file="../torrents.csv"
torrent_files_json="../torrent_files.json"
2018-12-03 05:19:03 +00:00
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
2018-12-02 21:05:00 +00:00
echo "Creating temporary torrents.db file from $csv_file ..."
# Remove double quotes for csv import
2018-12-03 05:19:03 +00:00
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
# Sort by seeders desc before insert
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
touch db_tmp
sqlite3 -batch db_tmp <<"EOF"
drop table if exists torrents;
create table torrents(
"infohash" TEXT,
"name" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
.separator ";"
.import torrents_removed_quotes.csv torrents
UPDATE torrents SET completed=NULL WHERE completed = '';
EOF
rm torrents_removed_quotes.csv
# Cache torrent files if they exist
if [ -f $torrent_files_json ]; then
echo "Building files DB from $torrent_files_json ..."
2019-04-10 22:48:23 +00:00
# Old way, doesn't work with too much ram
# jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' $torrent_files_json > torrent_files_temp
# New way, credit to ogusismail : https://stackoverflow.com/a/55600294/1655478
jq --stream -n -r 'foreach inputs as $pv ([[],[]]; if ($pv|length) == 2 then (.[0] |= if . == [] then . + [$pv[0][0],$pv[1]] else . + [$pv[1]] end) else [[],.[0]] end; if .[0] == [] and .[1] != [] then .[1] else empty end) | join(";")' $torrent_files_json > torrent_files_temp
# Removing those with too many ;
rg "^([^;]*;){3}[^;]+$" torrent_files_temp > torrent_files_temp_2
mv torrent_files_temp_2 torrent_files_temp
2019-03-18 19:31:32 +00:00
sqlite3 -batch db_tmp<<EOF
create table files_tmp(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER
);
.separator ";"
2019-03-18 19:31:32 +00:00
.import torrent_files_temp files_tmp
-- Filling the extra columns
2019-03-18 19:31:32 +00:00
create table files(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
insert into files
2019-03-18 19:31:32 +00:00
select files_tmp.infohash,
files_tmp.index_,
files_tmp.path,
files_tmp.size_bytes,
torrents.created_unix,
torrents.seeders,
torrents.leechers,
torrents.completed,
torrents.scraped_date
2019-03-18 19:31:32 +00:00
from files_tmp
inner join torrents on files_tmp.infohash = torrents.infohash
order by torrents.seeders desc, files_tmp.size_bytes desc;
delete from files where seeders is null;
2019-03-18 19:31:32 +00:00
drop table files_tmp;
EOF
2019-04-10 22:48:23 +00:00
rm torrent_files_temp
fi
2019-04-10 22:48:23 +00:00
mv db_tmp $db_file
2019-04-10 22:48:23 +00:00
echo "Done."