scan_torrent_files.js now filters out torrents not in torrents.csv.

scan_torrents.sh now runs the file scanner.
some additional fixes to pruning.
This commit is contained in:
Dessalines 2019-02-06 10:02:13 -08:00
parent a29edde5e8
commit cf71b8f6df
8 changed files with 39 additions and 27 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ run.out
old_greps.sh old_greps.sh
torrents.db torrents.db
.vscode .vscode
backups

View File

@ -44,8 +44,9 @@ fn main() {
// torrentz2(save_dir); // torrentz2(save_dir);
magnetdl(save_dir); magnetdl(save_dir);
skytorrents(save_dir);
leetx(save_dir); leetx(save_dir);
skytorrents(save_dir);
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") { if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
torrents_csv_scan(Path::new(t), save_dir); torrents_csv_scan(Path::new(t), save_dir);

View File

@ -3,7 +3,7 @@ csv_file="${TORRENTS_CSV_FILE:-../torrents.csv}"
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}" db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
torrent_files_json="`pwd`/../torrent_files.json" torrent_files_json="`pwd`/../torrent_files.json"
echo "Creating temporary torrents.db file..." echo "Creating temporary torrents.db file from $csv_file ..."
# Remove double quotes for csv import # Remove double quotes for csv import
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
@ -11,7 +11,6 @@ sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
# Sort by seeders desc before insert # Sort by seeders desc before insert
sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv sort --field-separator=';' --key=5 -nr -o torrents_removed_quotes.csv torrents_removed_quotes.csv
rm db_tmp
touch db_tmp touch db_tmp
sqlite3 -batch db_tmp <<"EOF" sqlite3 -batch db_tmp <<"EOF"

View File

@ -1,6 +1,6 @@
# This prunes torrents.csv, removing those with too many columns, and sorts it # This prunes torrents.csv, removing those with too many columns, and sorts it
echo "Pruning torrents.csv ..." echo "Pruning torrents.csv ..."
cd .. pushd ..
torrents_csv="`pwd`/torrents.csv" torrents_csv="`pwd`/torrents.csv"
torrents_csv_tmp="`pwd`/torrents_prune_tmp.csv" torrents_csv_tmp="`pwd`/torrents_prune_tmp.csv"
scanned_out="`pwd`/infohashes_scanned.txt" scanned_out="`pwd`/infohashes_scanned.txt"
@ -12,7 +12,7 @@ rg "^([^;]*;){7}[^;]+$" $torrents_csv_tmp > tmp_adds
mv tmp_adds $torrents_csv_tmp mv tmp_adds $torrents_csv_tmp
# Remove random newlines # Remove random newlines
sed -i '/^$/d' $torrents_csv_tmp sed -i '/^$/d' $torrents_csv_tmp
# Extract the header # Extract the header
header=$(head -n1 $torrents_csv_tmp) header=$(head -n1 $torrents_csv_tmp)
@ -41,4 +41,6 @@ sed -i "1i $header" $torrents_csv_tmp
mv $torrents_csv_tmp $torrents_csv mv $torrents_csv_tmp $torrents_csv
popd
echo "Pruning done." echo "Pruning done."

View File

@ -32,7 +32,7 @@ for f in tmp_*; do
done done
# Remove those lines from the file # Remove those lines from the file
rg -vwF -f no_seeds $torrents_csv > $torrents_removed grep -vwF -f no_seeds $torrents_csv > $torrents_removed
cd .. cd ..
rm $prune_currents_tmps rm $prune_currents_tmps

View File

@ -1,16 +1,18 @@
// jq -r 'to_entries[] | {hash: .key, val: .value[]} | {hash: .hash, i: .val.i, p: .val.p, l: .val.l}' torrent_files.json
// jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' torrent_files.json
var fs = require('fs'), var fs = require('fs'),
path = require('path'), path = require('path'),
readTorrent = require('read-torrent'), readTorrent = require('read-torrent'),
argv = require('minimist')(process.argv.slice(2)); argv = require('minimist')(process.argv.slice(2));
var torrentFiles = {}; var torrentFiles = {};
var torrentCsvHashes = new Set();
var jsonFile = '../torrent_files.json'; var jsonFile = '../torrent_files.json';
var torrentsCsvFile = '../torrents.csv';
main(); main();
async function main() { async function main() {
await fillTorrentFiles(); await fillTorrentFiles();
await fillTorrentCsvHashes();
await scanFolder(); await scanFolder();
writeFile(); writeFile();
} }
@ -23,13 +25,26 @@ async function fillTorrentFiles() {
} }
} }
async function fillTorrentCsvHashes() {
var fileContents = await fs.promises.readFile(torrentsCsvFile, 'utf8');
var lines = fileContents.split('\n');
for (const line of lines) {
var hash = line.split(';')[0];
torrentCsvHashes.add(hash);
}
torrentCsvHashes.delete('infohash');
}
async function scanFolder() { async function scanFolder() {
console.log('Scanning dir: ' + argv.dir + '...'); console.log('Scanning dir: ' + argv.dir + '...');
var files = fs.readdirSync(argv.dir).filter(f => { var files = fs.readdirSync(argv.dir).filter(f => {
var f = f.split('.'); var f = f.split('.');
var ext = f[1]; var ext = f[1];
var hash = f[0]; var hash = f[0];
return (ext == 'torrent' && !Object.keys(torrentFiles).includes(hash)); // It must be a torrent file, NOT already be in the files json,
// and be an infohash in the csv file.
return (ext == 'torrent' &&
!Object.keys(torrentFiles).includes(hash));
}); });
for (const file of files) { for (const file of files) {
var fullPath = argv.dir + '/' + file; var fullPath = argv.dir + '/' + file;
@ -41,7 +56,10 @@ async function scanFolder() {
} }
function writeFile() { function writeFile() {
torrentFiles = Object.keys(torrentFiles).sort().reduce((r, k) => (r[k] = torrentFiles[k], r), {}); torrentFiles = Object.keys(torrentFiles)
.sort()
.filter(hash => torrentCsvHashes.has(hash))
.reduce((r, k) => (r[k] = torrentFiles[k], r), {});
fs.writeFileSync(jsonFile, JSON.stringify(torrentFiles)); fs.writeFileSync(jsonFile, JSON.stringify(torrentFiles));
console.log(`${jsonFile} written.`); console.log(`${jsonFile} written.`);
} }

View File

@ -1,10 +0,0 @@
torrent_files_json="`pwd`/../torrent_files.json"
# Scan the torrent_files.json for already scanned torrents
# Disjoint the ones there with the ones in your torrent scan dir
# Run the js read-torrent in that dir, and update the torrent_files.json with the new ones
node scan_torrent_files.js --dir "$1"

View File

@ -2,10 +2,11 @@
# Checking arguments # Checking arguments
# Help line # Help line
cd ..
torrents_csv="`pwd`/torrents.csv" torrents_csv="`pwd`/../torrents.csv"
scanned_out="`pwd`/infohashes_scanned.txt" scanned_out="`pwd`/../infohashes_scanned.txt"
tmp_torrent_dir="`pwd`/tmp_torrents" tmp_torrent_dir="`pwd`/../tmp_torrents"
torrent_files_json="`pwd`/../torrent_files.json"
touch $scanned_out touch $scanned_out
help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help" help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help"
@ -42,7 +43,7 @@ ls|parallel -n100 mkdir {#}\;mv {} {#}
for tmp_torrent_dir_sub in *; do for tmp_torrent_dir_sub in *; do
echo "sub dir: $tmp_torrent_dir_sub" echo "sub dir: $tmp_torrent_dir_sub"
find $tmp_torrent_dir_sub -type f -exec basename {} .torrent \; > names.out find $tmp_torrent_dir_sub -type f -exec basename {} .torrent \; > names.out
# Delete null torrents from the temp dir # Delete null torrents from the temp dir
find $tmp_torrent_dir_sub -name "*.torrent" -size -2k -delete find $tmp_torrent_dir_sub -name "*.torrent" -size -2k -delete
@ -68,10 +69,10 @@ for tmp_torrent_dir_sub in *; do
done done
popd popd
cd scripts
. prune.sh . prune.sh
# Remove the temp dir # Remove the temp dir
rm -rf "$tmp_torrent_dir" rm -rf "$tmp_torrent_dir"
# Scan the torrent dir for new files, and add them
node scan_torrent_files.js --dir "$torrents_dir"