diff --git a/README.md b/README.md index 793c1bb..288e511 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [Demo Server](https://torrents-csv.ml) -`Torrents.csv` is a *collaborative* repository of torrents and their files, consisting of a searchable `torrents.csv`, and `torrent_files.json`. With it you can search for torrents, or files within torrents. It aims to be a universal file system for popular data. +`Torrents.csv` is a *collaborative* repository of torrents and their files, consisting of a searchable `torrents.csv`, and `torrent_files.csv`. With it you can search for torrents, or files within torrents. It aims to be a universal file system for popular data. Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites. It comes with a self-hostable [Torrents.csv webserver](https://torrents-csv.ml), a command line search, and a folder scanner to add torrents, and their files. @@ -62,7 +62,7 @@ bleh season 1 (1993-) link: magnet:?xt=urn:btih:INFO_HASH_HERE ``` ## Uploading / Adding Torrents from a Directory -An *upload*, consists of making a pull request after running the `scan_torrents.sh` script, which adds torrents from a directory you choose to the `.csv` file, after checking that they aren't already there, and that they have seeders. It also adds their files to `torrent_files.json`. +An *upload*, consists of making a pull request after running the `scan_torrents.sh` script, which adds torrents from a directory you choose to the `.csv` file, after checking that they aren't already there, and that they have seeders. It also adds their files to `torrent_files.csv`. ### Requirements - [Torrent-Tracker-Health Dessalines branch](https://github.com/dessalines/torrent-tracker-health) @@ -108,16 +108,7 @@ infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date # torrents here... ``` -## How the torrent_files.json looks -``` -{ - "012ae083ec82bf911f4fe503b9f6df1effaad9ac": [ - { - "i": 0, // the index - "p": "File 1", // the path - "l": 88546036A // the size in bytes - }, - ... - ] -} +## How the torrent_files.csv looks +```sh +infohash;index;path;size_bytes ``` diff --git a/scripts/build_sqlite.sh b/scripts/build_sqlite.sh index 139c1dd..b0f1b93 100755 --- a/scripts/build_sqlite.sh +++ b/scripts/build_sqlite.sh @@ -1,9 +1,9 @@ #!/bin/bash csv_file="../torrents.csv" -torrent_files_json="../torrent_files.json" +torrent_files_csv="../torrent_files.csv" db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}" -echo "Creating temporary torrents.db file from $csv_file ..." +echo "Creating temporary torrents.db file..." # Remove double quotes for csv import sed 's/\"//g' $csv_file > torrents_removed_quotes.csv @@ -31,18 +31,15 @@ UPDATE torrents SET completed=NULL WHERE completed = ''; EOF rm torrents_removed_quotes.csv -# Cache torrent files if they exist -if [ -f $torrent_files_json ]; then - echo "Building files DB from $torrent_files_json ..." +# Cache torrent files +echo "Building files DB from $torrent_files_csv ..." - # Old way, doesn't work with too much ram - # jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' $torrent_files_json > torrent_files_temp - - # New way, credit to ogusismail : https://stackoverflow.com/a/55600294/1655478 - jq --stream -n -r 'foreach inputs as $pv ([[],[]]; if ($pv|length) == 2 then (.[0] |= if . == [] then . + [$pv[0][0],$pv[1]] else . + [$pv[1]] end) else [[],.[0]] end; if .[0] == [] and .[1] != [] then .[1] else empty end) | join(";")' $torrent_files_json > torrent_files_temp +# Remove double quotes for csv import +sed 's/\"//g' $torrent_files_csv > torrent_files_removed_quotes.csv # Removing those with too many ; -rg "^([^;]*;){3}[^;]+$" torrent_files_temp > torrent_files_temp_2 +rg "^([^;]*;){3}[^;]+$" torrent_files_removed_quotes.csv > torrent_files_temp_2 +rm torrent_files_removed_quotes.csv mv torrent_files_temp_2 torrent_files_temp sqlite3 -batch db_tmp< { var sp = f.split('.'); @@ -46,44 +40,41 @@ async function scanFolder() { var hash = sp[0]; var fullPath = argv.dir + '/' + f; // It must be a torrent file, - // NOT in the torrent_files.json - // must be in the CSV file + // must not be in the CSV file // must have a file size + // must be in infohash format length return (ext == 'torrent' && - !fileHashes.has(hash) && - torrentCsvHashes.has(hash) && - getFilesizeInBytes(fullPath) > 0); + !scannedCsvHashes.has(hash) && + getFilesizeInBytes(fullPath) > 0) && + hash.length == 40; }); - for (const file of files) { + + for (file of files) { var fullPath = argv.dir + '/' + file; console.log(`Scanning File ${fullPath}`); - var torrent = await read(fullPath).catch(e => console.log('Read error')); - torrentFiles = { ...torrentFiles, ...torrent }; // concat them - }; - console.log('Done scanning.'); + var torrent = await read(fullPath).catch(e => console.log(e)); + await writeFile(torrent); + } + console.log('Done.'); +} + +function writeFile(torrent) { + for (const infohash in torrent) { + let files = torrent[infohash]; + for (const file of files) { + let csvRow = `${infohash};${file.i};${file.p};${file.l}\n`; + fs.appendFile(torrentFilesCsv, csvRow, function (err) { + if (err) throw err; + }); + + } + } } function getFilesizeInBytes(filename) { - var stats = fs.statSync(filename); - var fileSizeInBytes = stats["size"]; - return fileSizeInBytes; -} - -function writeFile() { - torrentFiles = Object.keys(torrentFiles) - .sort() - .filter(hash => torrentCsvHashes.has(hash)) - .reduce((r, k) => (r[k] = torrentFiles[k], r), {}); - fs.writeFileSync(jsonFile, "{\n"); - var first = true; - for (let [key, value] of Object.entries(torrentFiles)) { - if(first) first = false; - else fs.appendFileSync(jsonFile, ",\n"); - fs.appendFileSync(jsonFile, `${JSON.stringify(key)}:${JSON.stringify(value)}`); - } - fs.appendFileSync(jsonFile, "\n}"); - console.log(`${jsonFile} written.`); - process.exit(); + var stats = fs.statSync(filename); + var fileSizeInBytes = stats["size"]; + return fileSizeInBytes; } function read(uri, options) { diff --git a/scripts/scan_torrents.sh b/scripts/scan_torrents.sh index 76c619c..3076641 100755 --- a/scripts/scan_torrents.sh +++ b/scripts/scan_torrents.sh @@ -6,7 +6,7 @@ torrents_csv="`pwd`/../torrents.csv" scanned_out="`pwd`/../infohashes_scanned.txt" tmp_torrent_dir="`pwd`/../tmp_torrents-$RANDOM" -torrent_files_json="`pwd`/../torrent_files.json" +torrent_files_csv="`pwd`/../torrent_files.csv" touch $scanned_out help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help" @@ -76,5 +76,5 @@ popd rm -rf "$tmp_torrent_dir" # Scan the torrent dir for new files, and add them -node --max-old-space-size=4096 scan_torrent_files.js --dir "$torrents_dir" +node --max-old-space-size=8096 scan_torrent_files.js --dir "$torrents_dir"