Fixing add torrents script to not scrape if the info has is already there.

Adding a few torrents.
This commit is contained in:
Dessalines 2018-10-12 09:42:19 -07:00
parent fd0a1079c4
commit 34e7e947a3
2 changed files with 48 additions and 47 deletions

View File

@ -1,10 +1,10 @@
# Torrents.csv # Torrents.csv
`Torrents.csv` is a collaborative, vetted git repository of torrents, consisting of a single, searchable `torrents.csv` file. Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites via a rust script. `Torrents.csv` is a collaborative, *vetted* repository of torrents, consisting of a single, searchable `torrents.csv` file. Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites via a rust script.
`Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending. `Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending.
It also comes with a simple [Torrents.csv webserver](http://torrents-csv.ml) It also comes with a simple [Torrents.csv webserver](https://torrents-csv.ml)
![img](https://i.imgur.com/qVmSVMC.png) ![img](https://i.imgur.com/qVmSVMC.png)

View File

@ -44,60 +44,61 @@ for torrent_file in *.torrent; do
# Get fields from transmission # Get fields from transmission
show_text=$(transmission-show "$torrent_file") show_text=$(transmission-show "$torrent_file")
# echo "show text = $show_text"
name=$(grep -Po -m 1 'Name: \K.*' <<< $show_text)
name=$(sed 's/;/\\;/g' <<< $name) # Escape the commas for .csv
# Size: Unfortunately this will chop off some sigfigs
size=$(grep -Po 'Total Size: \K.*' <<< $show_text)
size_bytes=$(~/.local/bin/humanfriendly --parse-size="$size") # Convert to bytes
infohash=$(grep -Po 'Hash: \K.*' <<< $show_text) infohash=$(grep -Po 'Hash: \K.*' <<< $show_text)
# Convert the created date # If the infohash already exists, don't do anything
date_string=$(grep -Po 'Created on: \K.*' <<< $show_text)
created_date=""
if [[ "$date_string" == "Unknown" ]]; then
created_date=$(date +%s)
else
created_date=$(date -d "${date_string}" +"%s")
fi
# Scrape for seeder counts
scrape_text=$(timeout 20 python -m torrent_tracker_scraper.scraper \
-i "$infohash" \
-t tracker.coppersurfer.tk -p 6969)
# -t tracker.internetwarriors.net -p 1337
# -t tracker.opentrackr.org -p 1337
seeders=$(grep -Po 'Seeds: \K[0-9]+' <<< $scrape_text)
leechers=$(grep -Po 'Leechers: \K[0-9]+' <<< $scrape_text)
completed=$(grep -Po 'Completed: \K[0-9]+' <<< $scrape_text)
scraped_date=$(date +%s)
# Construct add line
add_line="$infohash;$name;$size_bytes;$created_date;$seeders;$leechers;$completed;$scraped_date"
# Only add the line if there are seeds, and the infohash doesn't already exist
if (( $seeders > 0 )); then
# If the infohash already exists, replace the line
found_line=$(rg -n $infohash $torrents_csv | cut -d : -f 1) found_line=$(rg -n $infohash $torrents_csv | cut -d : -f 1)
if [ ! -z $found_line ]; then if [ ! -z $found_line ]; then
sed -i "$found_line c$add_line" $torrents_csv echo "$infohash already exists"
echo -e "Found $name, updating peers"
else else
# Append the add lines to the torrents.csv file
echo -e "\n$add_line" >> $torrents_csv
truncate -s -1 $torrents_csv # Removing last newline
echo -e "Added $name"
fi
else
echo -e "$name has no seeders."
fi
name=$(grep -Po -m 1 'Name: \K.*' <<< $show_text)
name=$(sed 's/;/\\;/g' <<< $name) # Escape the commas for .csv
# Size: Unfortunately this will chop off some sigfigs
size=$(grep -Po 'Total Size: \K.*' <<< $show_text)
size_bytes=$(~/.local/bin/humanfriendly --parse-size="$size") # Convert to bytes
# Convert the created date
date_string=$(grep -Po 'Created on: \K.*' <<< $show_text)
created_date=""
if [[ "$date_string" == "Unknown" ]]; then
created_date=$(date +%s)
else
created_date=$(date -d "${date_string}" +"%s")
fi
# Scrape for seeder counts
scrape_text=$(timeout 20 python -m torrent_tracker_scraper.scraper \
-i "$infohash" \
-t tracker.coppersurfer.tk -p 6969)
# -t tracker.internetwarriors.net -p 1337
# -t tracker.opentrackr.org -p 1337
seeders=$(grep -Po 'Seeds: \K[0-9]+' <<< $scrape_text)
leechers=$(grep -Po 'Leechers: \K[0-9]+' <<< $scrape_text)
completed=$(grep -Po 'Completed: \K[0-9]+' <<< $scrape_text)
scraped_date=$(date +%s)
# Construct add line
add_line="$infohash;$name;$size_bytes;$created_date;$seeders;$leechers;$completed;$scraped_date"
# Only add the line if there are seeds, and the infohash doesn't already exist
if (( $seeders > 0 )); then
# Append the add lines to the torrents.csv file
echo -e "\n$add_line" >> $torrents_csv
truncate -s -1 $torrents_csv # Removing last newline
echo -e "Added $name"
else
echo -e "$name has no seeders."
fi
fi
done done
popd popd
cd scripts
. prune.sh . prune.sh