scan_torrent_files.js now filters out torrents not in torrents.csv.
scan_torrents.sh now runs the file scanner. some additional fixes to pruning.
This commit is contained in:
parent
a29edde5e8
commit
cf71b8f6df
|
@ -2,3 +2,4 @@ run.out
|
||||||
old_greps.sh
|
old_greps.sh
|
||||||
torrents.db
|
torrents.db
|
||||||
.vscode
|
.vscode
|
||||||
|
backups
|
||||||
|
|
|
@ -44,8 +44,9 @@ fn main() {
|
||||||
|
|
||||||
// torrentz2(save_dir);
|
// torrentz2(save_dir);
|
||||||
magnetdl(save_dir);
|
magnetdl(save_dir);
|
||||||
skytorrents(save_dir);
|
|
||||||
leetx(save_dir);
|
leetx(save_dir);
|
||||||
|
skytorrents(save_dir);
|
||||||
|
|
||||||
|
|
||||||
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
|
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
|
||||||
torrents_csv_scan(Path::new(t), save_dir);
|
torrents_csv_scan(Path::new(t), save_dir);
|
||||||
|
|
|
@ -3,7 +3,7 @@ csv_file="${TORRENTS_CSV_FILE:-../torrents.csv}"
|
||||||
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
|
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
|
||||||
torrent_files_json="`pwd`/../torrent_files.json"
|
torrent_files_json="`pwd`/../torrent_files.json"
|
||||||
|
|
||||||
echo "Creating temporary torrents.db file..."
|
echo "Creating temporary torrents.db file from $csv_file ..."
|
||||||
|
|
||||||
# Remove double quotes for csv import
|
# Remove double quotes for csv import
|
||||||
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
|
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
|
||||||
|
@ -11,7 +11,6 @@ sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
|
||||||
# Sort by seeders desc before insert
|
# Sort by seeders desc before insert
|
||||||
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
|
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
|
||||||
|
|
||||||
rm db_tmp
|
|
||||||
touch db_tmp
|
touch db_tmp
|
||||||
|
|
||||||
sqlite3 -batch db_tmp <<"EOF"
|
sqlite3 -batch db_tmp <<"EOF"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# This prunes torrents.csv, removing those with too many columns, and sorts it
|
# This prunes torrents.csv, removing those with too many columns, and sorts it
|
||||||
echo "Pruning torrents.csv ..."
|
echo "Pruning torrents.csv ..."
|
||||||
cd ..
|
pushd ..
|
||||||
torrents_csv="`pwd`/torrents.csv"
|
torrents_csv="`pwd`/torrents.csv"
|
||||||
torrents_csv_tmp="`pwd`/torrents_prune_tmp.csv"
|
torrents_csv_tmp="`pwd`/torrents_prune_tmp.csv"
|
||||||
scanned_out="`pwd`/infohashes_scanned.txt"
|
scanned_out="`pwd`/infohashes_scanned.txt"
|
||||||
|
@ -41,4 +41,6 @@ sed -i "1i $header" $torrents_csv_tmp
|
||||||
|
|
||||||
mv $torrents_csv_tmp $torrents_csv
|
mv $torrents_csv_tmp $torrents_csv
|
||||||
|
|
||||||
|
popd
|
||||||
|
|
||||||
echo "Pruning done."
|
echo "Pruning done."
|
||||||
|
|
|
@ -32,7 +32,7 @@ for f in tmp_*; do
|
||||||
done
|
done
|
||||||
|
|
||||||
# Remove those lines from the file
|
# Remove those lines from the file
|
||||||
rg -vwF -f no_seeds $torrents_csv > $torrents_removed
|
grep -vwF -f no_seeds $torrents_csv > $torrents_removed
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
rm $prune_currents_tmps
|
rm $prune_currents_tmps
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
// jq -r 'to_entries[] | {hash: .key, val: .value[]} | {hash: .hash, i: .val.i, p: .val.p, l: .val.l}' torrent_files.json
|
|
||||||
// jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' torrent_files.json
|
|
||||||
var fs = require('fs'),
|
var fs = require('fs'),
|
||||||
path = require('path'),
|
path = require('path'),
|
||||||
readTorrent = require('read-torrent'),
|
readTorrent = require('read-torrent'),
|
||||||
argv = require('minimist')(process.argv.slice(2));
|
argv = require('minimist')(process.argv.slice(2));
|
||||||
|
|
||||||
var torrentFiles = {};
|
var torrentFiles = {};
|
||||||
|
var torrentCsvHashes = new Set();
|
||||||
|
|
||||||
var jsonFile = '../torrent_files.json';
|
var jsonFile = '../torrent_files.json';
|
||||||
|
var torrentsCsvFile = '../torrents.csv';
|
||||||
main();
|
main();
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
await fillTorrentFiles();
|
await fillTorrentFiles();
|
||||||
|
await fillTorrentCsvHashes();
|
||||||
await scanFolder();
|
await scanFolder();
|
||||||
writeFile();
|
writeFile();
|
||||||
}
|
}
|
||||||
|
@ -23,13 +25,26 @@ async function fillTorrentFiles() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fillTorrentCsvHashes() {
|
||||||
|
var fileContents = await fs.promises.readFile(torrentsCsvFile, 'utf8');
|
||||||
|
var lines = fileContents.split('\n');
|
||||||
|
for (const line of lines) {
|
||||||
|
var hash = line.split(';')[0];
|
||||||
|
torrentCsvHashes.add(hash);
|
||||||
|
}
|
||||||
|
torrentCsvHashes.delete('infohash');
|
||||||
|
}
|
||||||
|
|
||||||
async function scanFolder() {
|
async function scanFolder() {
|
||||||
console.log('Scanning dir: ' + argv.dir + '...');
|
console.log('Scanning dir: ' + argv.dir + '...');
|
||||||
var files = fs.readdirSync(argv.dir).filter(f => {
|
var files = fs.readdirSync(argv.dir).filter(f => {
|
||||||
var f = f.split('.');
|
var f = f.split('.');
|
||||||
var ext = f[1];
|
var ext = f[1];
|
||||||
var hash = f[0];
|
var hash = f[0];
|
||||||
return (ext == 'torrent' && !Object.keys(torrentFiles).includes(hash));
|
// It must be a torrent file, NOT already be in the files json,
|
||||||
|
// and be an infohash in the csv file.
|
||||||
|
return (ext == 'torrent' &&
|
||||||
|
!Object.keys(torrentFiles).includes(hash));
|
||||||
});
|
});
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
var fullPath = argv.dir + '/' + file;
|
var fullPath = argv.dir + '/' + file;
|
||||||
|
@ -41,7 +56,10 @@ async function scanFolder() {
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeFile() {
|
function writeFile() {
|
||||||
torrentFiles = Object.keys(torrentFiles).sort().reduce((r, k) => (r[k] = torrentFiles[k], r), {});
|
torrentFiles = Object.keys(torrentFiles)
|
||||||
|
.sort()
|
||||||
|
.filter(hash => torrentCsvHashes.has(hash))
|
||||||
|
.reduce((r, k) => (r[k] = torrentFiles[k], r), {});
|
||||||
fs.writeFileSync(jsonFile, JSON.stringify(torrentFiles));
|
fs.writeFileSync(jsonFile, JSON.stringify(torrentFiles));
|
||||||
console.log(`${jsonFile} written.`);
|
console.log(`${jsonFile} written.`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
torrent_files_json="`pwd`/../torrent_files.json"
|
|
||||||
|
|
||||||
# Scan the torrent_files.json for already scanned torrents
|
|
||||||
|
|
||||||
# Disjoint the ones there with the ones in your torrent scan dir
|
|
||||||
|
|
||||||
# Run the js read-torrent in that dir, and update the torrent_files.json with the new ones
|
|
||||||
|
|
||||||
node scan_torrent_files.js --dir "$1"
|
|
||||||
|
|
|
@ -2,10 +2,11 @@
|
||||||
|
|
||||||
# Checking arguments
|
# Checking arguments
|
||||||
# Help line
|
# Help line
|
||||||
cd ..
|
|
||||||
torrents_csv="`pwd`/torrents.csv"
|
torrents_csv="`pwd`/../torrents.csv"
|
||||||
scanned_out="`pwd`/infohashes_scanned.txt"
|
scanned_out="`pwd`/../infohashes_scanned.txt"
|
||||||
tmp_torrent_dir="`pwd`/tmp_torrents"
|
tmp_torrent_dir="`pwd`/../tmp_torrents"
|
||||||
|
torrent_files_json="`pwd`/../torrent_files.json"
|
||||||
touch $scanned_out
|
touch $scanned_out
|
||||||
|
|
||||||
help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help"
|
help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help"
|
||||||
|
@ -68,10 +69,10 @@ for tmp_torrent_dir_sub in *; do
|
||||||
done
|
done
|
||||||
|
|
||||||
popd
|
popd
|
||||||
cd scripts
|
|
||||||
. prune.sh
|
. prune.sh
|
||||||
|
|
||||||
# Remove the temp dir
|
# Remove the temp dir
|
||||||
rm -rf "$tmp_torrent_dir"
|
rm -rf "$tmp_torrent_dir"
|
||||||
|
|
||||||
|
# Scan the torrent dir for new files, and add them
|
||||||
|
node scan_torrent_files.js --dir "$torrents_dir"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue