Changing to csv for torrent files as well. Fixes #60.
This commit is contained in:
parent
729b876102
commit
61523cf846
19
README.md
19
README.md
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
[Demo Server](https://torrents-csv.ml)
|
[Demo Server](https://torrents-csv.ml)
|
||||||
|
|
||||||
`Torrents.csv` is a *collaborative* repository of torrents and their files, consisting of a searchable `torrents.csv`, and `torrent_files.json`. With it you can search for torrents, or files within torrents. It aims to be a universal file system for popular data.
|
`Torrents.csv` is a *collaborative* repository of torrents and their files, consisting of a searchable `torrents.csv`, and `torrent_files.csv`. With it you can search for torrents, or files within torrents. It aims to be a universal file system for popular data.
|
||||||
|
|
||||||
Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites. It comes with a self-hostable [Torrents.csv webserver](https://torrents-csv.ml), a command line search, and a folder scanner to add torrents, and their files.
|
Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites. It comes with a self-hostable [Torrents.csv webserver](https://torrents-csv.ml), a command line search, and a folder scanner to add torrents, and their files.
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ bleh season 1 (1993-)
|
||||||
link: magnet:?xt=urn:btih:INFO_HASH_HERE
|
link: magnet:?xt=urn:btih:INFO_HASH_HERE
|
||||||
```
|
```
|
||||||
## Uploading / Adding Torrents from a Directory
|
## Uploading / Adding Torrents from a Directory
|
||||||
An *upload*, consists of making a pull request after running the `scan_torrents.sh` script, which adds torrents from a directory you choose to the `.csv` file, after checking that they aren't already there, and that they have seeders. It also adds their files to `torrent_files.json`.
|
An *upload*, consists of making a pull request after running the `scan_torrents.sh` script, which adds torrents from a directory you choose to the `.csv` file, after checking that they aren't already there, and that they have seeders. It also adds their files to `torrent_files.csv`.
|
||||||
|
|
||||||
### Requirements
|
### Requirements
|
||||||
- [Torrent-Tracker-Health Dessalines branch](https://github.com/dessalines/torrent-tracker-health)
|
- [Torrent-Tracker-Health Dessalines branch](https://github.com/dessalines/torrent-tracker-health)
|
||||||
|
@ -108,16 +108,7 @@ infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
|
||||||
# torrents here...
|
# torrents here...
|
||||||
```
|
```
|
||||||
|
|
||||||
## How the torrent_files.json looks
|
## How the torrent_files.csv looks
|
||||||
```
|
```sh
|
||||||
{
|
infohash;index;path;size_bytes
|
||||||
"012ae083ec82bf911f4fe503b9f6df1effaad9ac": [
|
|
||||||
{
|
|
||||||
"i": 0, // the index
|
|
||||||
"p": "File 1", // the path
|
|
||||||
"l": 88546036A // the size in bytes
|
|
||||||
},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
csv_file="../torrents.csv"
|
csv_file="../torrents.csv"
|
||||||
torrent_files_json="../torrent_files.json"
|
torrent_files_csv="../torrent_files.csv"
|
||||||
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
|
db_file="${TORRENTS_CSV_DB_FILE:-../torrents.db}"
|
||||||
|
|
||||||
echo "Creating temporary torrents.db file from $csv_file ..."
|
echo "Creating temporary torrents.db file..."
|
||||||
|
|
||||||
# Remove double quotes for csv import
|
# Remove double quotes for csv import
|
||||||
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
|
sed 's/\"//g' $csv_file > torrents_removed_quotes.csv
|
||||||
|
@ -31,18 +31,15 @@ UPDATE torrents SET completed=NULL WHERE completed = '';
|
||||||
EOF
|
EOF
|
||||||
rm torrents_removed_quotes.csv
|
rm torrents_removed_quotes.csv
|
||||||
|
|
||||||
# Cache torrent files if they exist
|
# Cache torrent files
|
||||||
if [ -f $torrent_files_json ]; then
|
echo "Building files DB from $torrent_files_csv ..."
|
||||||
echo "Building files DB from $torrent_files_json ..."
|
|
||||||
|
|
||||||
# Old way, doesn't work with too much ram
|
# Remove double quotes for csv import
|
||||||
# jq -r 'to_entries[] | {hash: .key, val: .value[]} | [.hash, .val.i, .val.p, .val.l] | join(";")' $torrent_files_json > torrent_files_temp
|
sed 's/\"//g' $torrent_files_csv > torrent_files_removed_quotes.csv
|
||||||
|
|
||||||
# New way, credit to ogusismail : https://stackoverflow.com/a/55600294/1655478
|
|
||||||
jq --stream -n -r 'foreach inputs as $pv ([[],[]]; if ($pv|length) == 2 then (.[0] |= if . == [] then . + [$pv[0][0],$pv[1]] else . + [$pv[1]] end) else [[],.[0]] end; if .[0] == [] and .[1] != [] then .[1] else empty end) | join(";")' $torrent_files_json > torrent_files_temp
|
|
||||||
|
|
||||||
# Removing those with too many ;
|
# Removing those with too many ;
|
||||||
rg "^([^;]*;){3}[^;]+$" torrent_files_temp > torrent_files_temp_2
|
rg "^([^;]*;){3}[^;]+$" torrent_files_removed_quotes.csv > torrent_files_temp_2
|
||||||
|
rm torrent_files_removed_quotes.csv
|
||||||
mv torrent_files_temp_2 torrent_files_temp
|
mv torrent_files_temp_2 torrent_files_temp
|
||||||
|
|
||||||
sqlite3 -batch db_tmp<<EOF
|
sqlite3 -batch db_tmp<<EOF
|
||||||
|
@ -84,9 +81,7 @@ delete from files where seeders is null;
|
||||||
drop table files_tmp;
|
drop table files_tmp;
|
||||||
EOF
|
EOF
|
||||||
rm torrent_files_temp
|
rm torrent_files_temp
|
||||||
fi
|
|
||||||
|
|
||||||
mv db_tmp $db_file
|
mv db_tmp $db_file
|
||||||
|
|
||||||
echo "Done."
|
|
||||||
|
|
||||||
|
echo "Done."
|
||||||
|
|
|
@ -5,6 +5,9 @@ torrents_csv="`pwd`/torrents.csv"
|
||||||
torrents_csv_tmp="`pwd`/torrents_prune_tmp.csv"
|
torrents_csv_tmp="`pwd`/torrents_prune_tmp.csv"
|
||||||
scanned_out="`pwd`/infohashes_scanned.txt"
|
scanned_out="`pwd`/infohashes_scanned.txt"
|
||||||
|
|
||||||
|
torrent_files_csv="`pwd`/torrent_files.csv"
|
||||||
|
torrent_files_csv_tmp="`pwd`/torrent_files_tmp.csv"
|
||||||
|
|
||||||
cp $torrents_csv $torrents_csv_tmp
|
cp $torrents_csv $torrents_csv_tmp
|
||||||
|
|
||||||
# Remove lines that don't have exactly 7 ';'
|
# Remove lines that don't have exactly 7 ';'
|
||||||
|
@ -39,6 +42,22 @@ sed -i "1i $header" $torrents_csv_tmp
|
||||||
|
|
||||||
mv $torrents_csv_tmp $torrents_csv
|
mv $torrents_csv_tmp $torrents_csv
|
||||||
|
|
||||||
|
# Torrent files cleanup
|
||||||
|
echo "Pruning torrent_files.csv ..."
|
||||||
|
cp $torrent_files_csv $torrent_files_csv_tmp
|
||||||
|
|
||||||
|
# Header
|
||||||
|
header=$(head -n1 $torrent_files_csv_tmp)
|
||||||
|
sed -i '1d' $torrent_files_csv_tmp
|
||||||
|
|
||||||
|
# Same for the infohashes scanned
|
||||||
|
sort --field-separator=';' --key=1,2 -o $torrent_files_csv_tmp $torrent_files_csv_tmp
|
||||||
|
|
||||||
|
# Add the header back in
|
||||||
|
sed -i "1i $header" $torrent_files_csv_tmp
|
||||||
|
|
||||||
|
mv $torrent_files_csv_tmp $torrent_files_csv
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
|
||||||
echo "Pruning done."
|
echo "Pruning done."
|
||||||
|
|
|
@ -1,44 +1,38 @@
|
||||||
const fs = require('fs'),
|
const fs = require('fs'),
|
||||||
path = require('path'),
|
|
||||||
readTorrent = require('read-torrent'),
|
readTorrent = require('read-torrent'),
|
||||||
argv = require('minimist')(process.argv.slice(2));
|
argv = require('minimist')(process.argv.slice(2)),
|
||||||
|
readline = require('readline');
|
||||||
|
|
||||||
var torrentFiles = {};
|
var scannedCsvHashes = new Set();
|
||||||
var torrentCsvHashes = new Set();
|
|
||||||
|
|
||||||
var jsonFile = '../torrent_files.json';
|
var torrentFilesCsv = '../torrent_files.csv';
|
||||||
var torrentsCsvFile = '../torrents.csv';
|
console.log(`Scanning torrent files from ${argv.dir} into ${torrentFilesCsv} ...`);
|
||||||
console.log(`Scanning torrent files from ${argv.dir} into ${jsonFile} ...`);
|
|
||||||
main();
|
main();
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
await fillTorrentFiles();
|
await fillScannedHashes();
|
||||||
await fillTorrentCsvHashes();
|
scanFolder();
|
||||||
await scanFolder();
|
|
||||||
writeFile();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fillScannedHashes() {
|
||||||
|
console.log(`Filling CSV hashes...`);
|
||||||
|
const fileStream = fs.createReadStream(torrentFilesCsv);
|
||||||
|
|
||||||
async function fillTorrentFiles() {
|
const rl = readline.createInterface({
|
||||||
if (fs.existsSync(jsonFile)) {
|
input: fileStream,
|
||||||
var fileContents = await fs.promises.readFile(jsonFile, 'utf8');
|
crlfDelay: Infinity
|
||||||
torrentFiles = JSON.parse(fileContents);
|
});
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fillTorrentCsvHashes() {
|
for await (const line of rl) {
|
||||||
var fileContents = await fs.promises.readFile(torrentsCsvFile, 'utf8');
|
|
||||||
var lines = fileContents.split('\n');
|
|
||||||
for (const line of lines) {
|
|
||||||
var hash = line.split(';')[0];
|
var hash = line.split(';')[0];
|
||||||
torrentCsvHashes.add(hash);
|
scannedCsvHashes.add(hash);
|
||||||
}
|
}
|
||||||
torrentCsvHashes.delete('infohash');
|
|
||||||
|
scannedCsvHashes.delete('infohash');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanFolder() {
|
async function scanFolder() {
|
||||||
console.log('Scanning dir: ' + argv.dir + '...');
|
console.log('Scanning dir: ' + argv.dir + '...');
|
||||||
var fileHashes = new Set(Object.keys(torrentFiles));
|
|
||||||
|
|
||||||
var files = fs.readdirSync(argv.dir).filter(f => {
|
var files = fs.readdirSync(argv.dir).filter(f => {
|
||||||
var sp = f.split('.');
|
var sp = f.split('.');
|
||||||
|
@ -46,21 +40,35 @@ async function scanFolder() {
|
||||||
var hash = sp[0];
|
var hash = sp[0];
|
||||||
var fullPath = argv.dir + '/' + f;
|
var fullPath = argv.dir + '/' + f;
|
||||||
// It must be a torrent file,
|
// It must be a torrent file,
|
||||||
// NOT in the torrent_files.json
|
// must not be in the CSV file
|
||||||
// must be in the CSV file
|
|
||||||
// must have a file size
|
// must have a file size
|
||||||
|
// must be in infohash format length
|
||||||
return (ext == 'torrent' &&
|
return (ext == 'torrent' &&
|
||||||
!fileHashes.has(hash) &&
|
!scannedCsvHashes.has(hash) &&
|
||||||
torrentCsvHashes.has(hash) &&
|
getFilesizeInBytes(fullPath) > 0) &&
|
||||||
getFilesizeInBytes(fullPath) > 0);
|
hash.length == 40;
|
||||||
});
|
});
|
||||||
for (const file of files) {
|
|
||||||
|
for (file of files) {
|
||||||
var fullPath = argv.dir + '/' + file;
|
var fullPath = argv.dir + '/' + file;
|
||||||
console.log(`Scanning File ${fullPath}`);
|
console.log(`Scanning File ${fullPath}`);
|
||||||
var torrent = await read(fullPath).catch(e => console.log('Read error'));
|
var torrent = await read(fullPath).catch(e => console.log(e));
|
||||||
torrentFiles = { ...torrentFiles, ...torrent }; // concat them
|
await writeFile(torrent);
|
||||||
};
|
}
|
||||||
console.log('Done scanning.');
|
console.log('Done.');
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeFile(torrent) {
|
||||||
|
for (const infohash in torrent) {
|
||||||
|
let files = torrent[infohash];
|
||||||
|
for (const file of files) {
|
||||||
|
let csvRow = `${infohash};${file.i};${file.p};${file.l}\n`;
|
||||||
|
fs.appendFile(torrentFilesCsv, csvRow, function (err) {
|
||||||
|
if (err) throw err;
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function getFilesizeInBytes(filename) {
|
function getFilesizeInBytes(filename) {
|
||||||
|
@ -69,23 +77,6 @@ function getFilesizeInBytes(filename) {
|
||||||
return fileSizeInBytes;
|
return fileSizeInBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeFile() {
|
|
||||||
torrentFiles = Object.keys(torrentFiles)
|
|
||||||
.sort()
|
|
||||||
.filter(hash => torrentCsvHashes.has(hash))
|
|
||||||
.reduce((r, k) => (r[k] = torrentFiles[k], r), {});
|
|
||||||
fs.writeFileSync(jsonFile, "{\n");
|
|
||||||
var first = true;
|
|
||||||
for (let [key, value] of Object.entries(torrentFiles)) {
|
|
||||||
if(first) first = false;
|
|
||||||
else fs.appendFileSync(jsonFile, ",\n");
|
|
||||||
fs.appendFileSync(jsonFile, `${JSON.stringify(key)}:${JSON.stringify(value)}`);
|
|
||||||
}
|
|
||||||
fs.appendFileSync(jsonFile, "\n}");
|
|
||||||
console.log(`${jsonFile} written.`);
|
|
||||||
process.exit();
|
|
||||||
}
|
|
||||||
|
|
||||||
function read(uri, options) {
|
function read(uri, options) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
readTorrent(uri, (err, info) => {
|
readTorrent(uri, (err, info) => {
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
torrents_csv="`pwd`/../torrents.csv"
|
torrents_csv="`pwd`/../torrents.csv"
|
||||||
scanned_out="`pwd`/../infohashes_scanned.txt"
|
scanned_out="`pwd`/../infohashes_scanned.txt"
|
||||||
tmp_torrent_dir="`pwd`/../tmp_torrents-$RANDOM"
|
tmp_torrent_dir="`pwd`/../tmp_torrents-$RANDOM"
|
||||||
torrent_files_json="`pwd`/../torrent_files.json"
|
torrent_files_csv="`pwd`/../torrent_files.csv"
|
||||||
touch $scanned_out
|
touch $scanned_out
|
||||||
|
|
||||||
help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help"
|
help="Run ./scan_torrents.sh [TORRENTS_DIR] \nor goto https://gitlab.com/dessalines/torrents.csv for more help"
|
||||||
|
@ -76,5 +76,5 @@ popd
|
||||||
rm -rf "$tmp_torrent_dir"
|
rm -rf "$tmp_torrent_dir"
|
||||||
|
|
||||||
# Scan the torrent dir for new files, and add them
|
# Scan the torrent dir for new files, and add them
|
||||||
node --max-old-space-size=4096 scan_torrent_files.js --dir "$torrents_dir"
|
node --max-old-space-size=8096 scan_torrent_files.js --dir "$torrents_dir"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue