torrents.csv/scripts/build_sqlite.sh

93 lines
2.5 KiB
Bash
Executable File

#!/bin/bash
csv_file="../torrents.csv"
torrent_files_json="../torrent_files.json"
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
echo "Creating temporary torrents.db file from $csv_file ..."
# Remove double quotes for csv import
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
# Sort by seeders desc before insert
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
touch db_tmp
sqlite3 -batch db_tmp <<"EOF"
drop table if exists torrents;
create table torrents(
"infohash" TEXT,
"name" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
.separator ";"
.import torrents_removed_quotes.csv torrents
UPDATE torrents SET completed=NULL WHERE completed = '';
EOF
rm torrents_removed_quotes.csv
# Cache torrent files if they exist
if [ -f $torrent_files_json ]; then
echo "Building files DB from $torrent_files_json ..."
# Old way, doesn't work with too much ram
# jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' $torrent_files_json > torrent_files_temp
# New way, credit to ogusismail : https://stackoverflow.com/a/55600294/1655478
jq --stream -n -r 'foreach inputs as $pv ([[],[]]; if ($pv|length) == 2 then (.[0] |= if . == [] then . + [$pv[0][0],$pv[1]] else . + [$pv[1]] end) else [[],.[0]] end; if .[0] == [] and .[1] != [] then .[1] else empty end) | join(";")' $torrent_files_json > torrent_files_temp
# Removing those with too many ;
rg "^([^;]*;){3}[^;]+$" torrent_files_temp > torrent_files_temp_2
mv torrent_files_temp_2 torrent_files_temp
sqlite3 -batch db_tmp<<EOF
create table files_tmp(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER
);
.separator ";"
.import torrent_files_temp files_tmp
-- Filling the extra columns
create table files(
"infohash" TEXT,
"index_" INTEGER,
"path" TEXT,
"size_bytes" INTEGER,
"created_unix" INTEGER,
"seeders" INTEGER,
"leechers" INTEGER,
"completed" INTEGER,
"scraped_date" INTEGER
);
insert into files
select files_tmp.infohash,
files_tmp.index_,
files_tmp.path,
files_tmp.size_bytes,
torrents.created_unix,
torrents.seeders,
torrents.leechers,
torrents.completed,
torrents.scraped_date
from files_tmp
inner join torrents on files_tmp.infohash = torrents.infohash
order by torrents.seeders desc, files_tmp.size_bytes desc;
delete from files where seeders is null;
drop table files_tmp;
EOF
rm torrent_files_temp
fi
mv db_tmp $db_file
echo "Done."