Import of TPB Jan 2017 backup. Updating search and add scripts. Adding prune.sh

2018-10-07 22:43:12 -07:00 · 2018-10-07 22:43:12 -07:00 · 1f31cefc67
commit 1f31cefc67
parent e767fe77fb
6 changed files with 61 additions and 34 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 run.out
 old_greps.sh
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@ To find torrents run `./search.sh "frasier s01"`
 ```
 Frasier S01-S11 (1993-)
 	seeders: 33
-	size: 13.43 GB
+	size: 13GiB
 	link: magnet:?xt=urn:btih:3cc5142d0d139bcc9ea9925239a142770b98cf74
 ```
@ -34,8 +34,24 @@ infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
 # torrents here...
 ```
 ## Requirements
 ### Searching
 - [ripgrep](https://github.com/BurntSushi/ripgrep)
 ### Uploading
 - [Torrent tracker scraper](https://github.com/ZigmundVonZaun/torrent-tracker-scraper)
 - [Transmission-cli](https://github.com/ZigmundVonZaun/torrent-tracker-scraper)
 - [Human Friendly](https://humanfriendly.readthedocs.io/en/latest/readme.html#command-line)
 ## Other info
 `Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending.
 Its initially populated with a January 2017 backup of the pirate bay, but eventually scripts will be written that pull in new torrents daily. 
 ## Potential sources for new torrents
 - https://www.skytorrents.lol/top100
 - https://1337x.to/top-100
 - https://1337x.to/trending
--- a/add_torrents.sh
+++ b/add_torrents.sh
@ -37,7 +37,7 @@ else
 fi
 # Loop over all torrents
-cd $torrents_dir
+pushd $torrents_dir
 for torrent_file in *.torrent; do
  # Get fields from transmission
@ -81,7 +81,7 @@ for torrent_file in *.torrent; do
  if (( $seeders > 0 )); then
    # If the infohash already exists, replace the line
-    found_line=$(grep -n $infohash $torrents_csv | cut -d : -f 1) 
+    found_line=$(rg -n $infohash $torrents_csv | cut -d : -f 1) 
    if [ ! -z $found_line ]; then
      sed -i "$found_line c$add_line" $torrents_csv
      echo -e "Found $name, updating peers"
@ -97,13 +97,6 @@ for torrent_file in *.torrent; do
 done
-# Sort the file, remove random newlines
+popd
-sed -i '/^$/d' $torrents_csv
+. prune.sh
 header=$(head -n1 $torrents_csv)
 sed -i '1d' $torrents_csv
 sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv
 sed  -i "1i $header" $torrents_csv
 truncate -s -1 $torrents_csv # Removing last newline
--- a/prune.sh
+++ b/prune.sh
@ -0,0 +1,26 @@
 # This prunes torrents.csv, removing those with too many columns, and sorts it
 torrents_csv="`pwd`/torrents.csv"
 # Remove lines that don't have exactly 7 ';'
 rg "^([^;]*;){7}[^;]+$" $torrents_csv > tmp_adds
 mv tmp_adds $torrents_csv
 # Remove random newlines
 sed -i '/^$/d' $torrents_csv 
 # Extract the header
 header=$(head -n1 $torrents_csv) 
 sed -i '1d' $torrents_csv
 # Remove dups
 sort -u -t';' -k1,1 -o $torrents_csv $torrents_csv
 # Sort by seeders desc
 sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv
 # Add the header back in
 sed  -i "1i $header" $torrents_csv
 truncate -s -1 $torrents_csv # Removing last newline
--- a/scrape.sh
+++ b/scrape.sh
--- a/search.sh
+++ b/search.sh
@ -2,26 +2,17 @@
 torrent_csv_file="`pwd`/torrents.csv"
 search_string=${1// /.*} # turn multiple string regexes into i.*am.*spartacus
 search=$(grep -i "$search_string" $torrent_csv_file)
 # Sort results by seeders
 search=$(echo -e "$search" | sort --field-separator=';' --key=5 -g)
 if [ -z "$search" ]; then
  echo "No results found"
 else
 # Read the lines of the results
-  while read -r line; do
+rg -i "$search_string" $torrent_csv_file | sort --field-separator=';' --key=5 -g | while read -r line; do
  infohash=$(echo -e "$line" | cut -d ';' -f1)
  magnet_link="magnet:?xt=urn:btih:$infohash"
  name=$(echo -e "$line" | cut -d ';' -f2)
  seeders=$(echo -e "$line" | cut -d ';' -f5)
  size_bytes=$(echo -e "$line" | cut -d ';' -f3)
-    size=$(~/.local/bin/humanfriendly -s $size_bytes) # This slows down the results a bit
+  size=$(numfmt --to=iec-i --suffix=B $size_bytes)
  # Construct the search result
  result="$name\n\tseeders: $seeders\n\tsize: $size\n\tlink: $magnet_link"
  echo -e "$result"
-  done <<< "$search"
+done
-fi
+