Import of TPB Jan 2017 backup. Updating search and add scripts. Adding prune.sh

This commit is contained in:
Dessalines 2018-10-07 22:43:12 -07:00
parent e767fe77fb
commit 1f31cefc67
6 changed files with 61 additions and 34 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
run.out run.out
old_greps.sh

View File

@ -7,7 +7,7 @@ To find torrents run `./search.sh "frasier s01"`
``` ```
Frasier S01-S11 (1993-) Frasier S01-S11 (1993-)
seeders: 33 seeders: 33
size: 13.43 GB size: 13GiB
link: magnet:?xt=urn:btih:3cc5142d0d139bcc9ea9925239a142770b98cf74 link: magnet:?xt=urn:btih:3cc5142d0d139bcc9ea9925239a142770b98cf74
``` ```
@ -34,8 +34,24 @@ infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
# torrents here... # torrents here...
``` ```
## Requirements
### Searching
- [ripgrep](https://github.com/BurntSushi/ripgrep)
### Uploading
- [Torrent tracker scraper](https://github.com/ZigmundVonZaun/torrent-tracker-scraper)
- [Transmission-cli](https://github.com/ZigmundVonZaun/torrent-tracker-scraper)
- [Human Friendly](https://humanfriendly.readthedocs.io/en/latest/readme.html#command-line)
## Other info ## Other info
`Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending. `Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending.
Its initially populated with a January 2017 backup of the pirate bay, but eventually scripts will be written that pull in new torrents daily. Its initially populated with a January 2017 backup of the pirate bay, but eventually scripts will be written that pull in new torrents daily.
## Potential sources for new torrents
- https://www.skytorrents.lol/top100
- https://1337x.to/top-100
- https://1337x.to/trending

View File

@ -37,7 +37,7 @@ else
fi fi
# Loop over all torrents # Loop over all torrents
cd $torrents_dir pushd $torrents_dir
for torrent_file in *.torrent; do for torrent_file in *.torrent; do
# Get fields from transmission # Get fields from transmission
@ -81,7 +81,7 @@ for torrent_file in *.torrent; do
if (( $seeders > 0 )); then if (( $seeders > 0 )); then
# If the infohash already exists, replace the line # If the infohash already exists, replace the line
found_line=$(grep -n $infohash $torrents_csv | cut -d : -f 1) found_line=$(rg -n $infohash $torrents_csv | cut -d : -f 1)
if [ ! -z $found_line ]; then if [ ! -z $found_line ]; then
sed -i "$found_line c$add_line" $torrents_csv sed -i "$found_line c$add_line" $torrents_csv
echo -e "Found $name, updating peers" echo -e "Found $name, updating peers"
@ -97,13 +97,6 @@ for torrent_file in *.torrent; do
done done
# Sort the file, remove random newlines popd
sed -i '/^$/d' $torrents_csv . prune.sh
header=$(head -n1 $torrents_csv)
sed -i '1d' $torrents_csv
sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv
sed -i "1i $header" $torrents_csv
truncate -s -1 $torrents_csv # Removing last newline

26
prune.sh Normal file → Executable file
View File

@ -0,0 +1,26 @@
# This prunes torrents.csv, removing those with too many columns, and sorts it
torrents_csv="`pwd`/torrents.csv"
# Remove lines that don't have exactly 7 ';'
rg "^([^;]*;){7}[^;]+$" $torrents_csv > tmp_adds
mv tmp_adds $torrents_csv
# Remove random newlines
sed -i '/^$/d' $torrents_csv
# Extract the header
header=$(head -n1 $torrents_csv)
sed -i '1d' $torrents_csv
# Remove dups
sort -u -t';' -k1,1 -o $torrents_csv $torrents_csv
# Sort by seeders desc
sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv
# Add the header back in
sed -i "1i $header" $torrents_csv
truncate -s -1 $torrents_csv # Removing last newline

0
scrape.sh Normal file
View File

View File

@ -2,26 +2,17 @@
torrent_csv_file="`pwd`/torrents.csv" torrent_csv_file="`pwd`/torrents.csv"
search_string=${1// /.*} # turn multiple string regexes into i.*am.*spartacus search_string=${1// /.*} # turn multiple string regexes into i.*am.*spartacus
search=$(grep -i "$search_string" $torrent_csv_file)
# Sort results by seeders
search=$(echo -e "$search" | sort --field-separator=';' --key=5 -g)
if [ -z "$search" ]; then
echo "No results found"
else
# Read the lines of the results # Read the lines of the results
while read -r line; do rg -i "$search_string" $torrent_csv_file | sort --field-separator=';' --key=5 -g | while read -r line; do
infohash=$(echo -e "$line" | cut -d ';' -f1) infohash=$(echo -e "$line" | cut -d ';' -f1)
magnet_link="magnet:?xt=urn:btih:$infohash" magnet_link="magnet:?xt=urn:btih:$infohash"
name=$(echo -e "$line" | cut -d ';' -f2) name=$(echo -e "$line" | cut -d ';' -f2)
seeders=$(echo -e "$line" | cut -d ';' -f5) seeders=$(echo -e "$line" | cut -d ';' -f5)
size_bytes=$(echo -e "$line" | cut -d ';' -f3) size_bytes=$(echo -e "$line" | cut -d ';' -f3)
size=$(~/.local/bin/humanfriendly -s $size_bytes) # This slows down the results a bit size=$(numfmt --to=iec-i --suffix=B $size_bytes)
# Construct the search result # Construct the search result
result="$name\n\tseeders: $seeders\n\tsize: $size\n\tlink: $magnet_link" result="$name\n\tseeders: $seeders\n\tsize: $size\n\tlink: $magnet_link"
echo -e "$result" echo -e "$result"
done <<< "$search" done
fi