Import of TPB Jan 2017 backup. Updating search and add scripts. Adding prune.sh

2018-10-07 22:43:12 -07:00 · 2018-10-07 22:43:12 -07:00 · 1f31cefc67
commit 1f31cefc67
parent e767fe77fb
6 changed files with 61 additions and 34 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 run.out
+old_greps.sh
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@ To find torrents run `./search.sh "frasier s01"`
 ```
 Frasier S01-S11 (1993-)
 	seeders: 33
-	size: 13.43 GB
+	size: 13GiB
 	link: magnet:?xt=urn:btih:3cc5142d0d139bcc9ea9925239a142770b98cf74
 ```

@ -34,8 +34,24 @@ infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
 # torrents here...
 ```

+## Requirements
+
+### Searching
+- [ripgrep](https://github.com/BurntSushi/ripgrep)
+
+### Uploading
+- [Torrent tracker scraper](https://github.com/ZigmundVonZaun/torrent-tracker-scraper)
+- [Transmission-cli](https://github.com/ZigmundVonZaun/torrent-tracker-scraper)
+- [Human Friendly](https://humanfriendly.readthedocs.io/en/latest/readme.html#command-line)
+
+
 ## Other info

 `Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending.

-Its initially populated with a January 2017 backup of the pirate bay, but eventually scripts will be written that pull in new torrents daily. 
+Its initially populated with a January 2017 backup of the pirate bay, but eventually scripts will be written that pull in new torrents daily. 
+
+## Potential sources for new torrents
+- https://www.skytorrents.lol/top100
+- https://1337x.to/top-100
+- https://1337x.to/trending
--- a/add_torrents.sh
+++ b/add_torrents.sh
@ -37,7 +37,7 @@ else
 fi

 # Loop over all torrents
-cd $torrents_dir
+pushd $torrents_dir
 for torrent_file in *.torrent; do

  # Get fields from transmission
@ -81,7 +81,7 @@ for torrent_file in *.torrent; do
  if (( $seeders > 0 )); then

    # If the infohash already exists, replace the line
-    found_line=$(grep -n $infohash $torrents_csv | cut -d : -f 1) 
+    found_line=$(rg -n $infohash $torrents_csv | cut -d : -f 1) 
    if [ ! -z $found_line ]; then
      sed -i "$found_line c$add_line" $torrents_csv
      echo -e "Found $name, updating peers"
@ -97,13 +97,6 @@ for torrent_file in *.torrent; do

 done

-# Sort the file, remove random newlines
-sed -i '/^$/d' $torrents_csv
-header=$(head -n1 $torrents_csv)
-sed -i '1d' $torrents_csv
-sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv
-sed  -i "1i $header" $torrents_csv
-truncate -s -1 $torrents_csv # Removing last newline
-
-
+popd
+. prune.sh

--- a/prune.sh
+++ b/prune.sh
@ -0,0 +1,26 @@
+# This prunes torrents.csv, removing those with too many columns, and sorts it
+
+torrents_csv="`pwd`/torrents.csv"
+
+# Remove lines that don't have exactly 7 ';'
+rg "^([^;]*;){7}[^;]+$" $torrents_csv > tmp_adds
+mv tmp_adds $torrents_csv
+
+# Remove random newlines
+sed -i '/^$/d' $torrents_csv 
+
+# Extract the header
+header=$(head -n1 $torrents_csv) 
+sed -i '1d' $torrents_csv
+
+# Remove dups
+sort -u -t';' -k1,1 -o $torrents_csv $torrents_csv
+
+ # Sort by seeders desc
+sort --field-separator=';' --key=5 -nr -o $torrents_csv $torrents_csv
+
+# Add the header back in
+sed  -i "1i $header" $torrents_csv
+truncate -s -1 $torrents_csv # Removing last newline
+
+
--- a/scrape.sh
+++ b/scrape.sh
--- a/search.sh
+++ b/search.sh
@ -2,26 +2,17 @@
 torrent_csv_file="`pwd`/torrents.csv"

 search_string=${1// /.*} # turn multiple string regexes into i.*am.*spartacus
-search=$(grep -i "$search_string" $torrent_csv_file)

-# Sort results by seeders
-search=$(echo -e "$search" | sort --field-separator=';' --key=5 -g)
+# Read the lines of the results
+rg -i "$search_string" $torrent_csv_file | sort --field-separator=';' --key=5 -g | while read -r line; do
+  infohash=$(echo -e "$line" | cut -d ';' -f1)
+  magnet_link="magnet:?xt=urn:btih:$infohash"
+  name=$(echo -e "$line" | cut -d ';' -f2)
+  seeders=$(echo -e "$line" | cut -d ';' -f5)
+  size_bytes=$(echo -e "$line" | cut -d ';' -f3)
+  size=$(numfmt --to=iec-i --suffix=B $size_bytes)
+  # Construct the search result
+  result="$name\n\tseeders: $seeders\n\tsize: $size\n\tlink: $magnet_link"
+  echo -e "$result"
+done

-if [ -z "$search" ]; then
-  echo "No results found"
-else
-
-  # Read the lines of the results
-  while read -r line; do
-    infohash=$(echo -e "$line" | cut -d ';' -f1)
-    magnet_link="magnet:?xt=urn:btih:$infohash"
-    name=$(echo -e "$line" | cut -d ';' -f2)
-    seeders=$(echo -e "$line" | cut -d ';' -f5)
-    size_bytes=$(echo -e "$line" | cut -d ';' -f3)
-    size=$(~/.local/bin/humanfriendly -s $size_bytes) # This slows down the results a bit
-
-    # Construct the search result
-    result="$name\n\tseeders: $seeders\n\tsize: $size\n\tlink: $magnet_link"
-    echo -e "$result"
-  done <<< "$search"
-fi