Fixing add torrents script to not scrape if the info has is already there.
Adding a few torrents.
This commit is contained in:
parent
fd0a1079c4
commit
34e7e947a3
|
@ -1,10 +1,10 @@
|
|||
# Torrents.csv
|
||||
|
||||
`Torrents.csv` is a collaborative, vetted git repository of torrents, consisting of a single, searchable `torrents.csv` file. Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites via a rust script.
|
||||
`Torrents.csv` is a collaborative, *vetted* repository of torrents, consisting of a single, searchable `torrents.csv` file. Its initially populated with a January 2017 backup of the pirate bay, and new torrents are periodically added from various torrents sites via a rust script.
|
||||
|
||||
`Torrents.csv` will only store torrents with at least one seeder to keep the file small, and will be periodically purged of non-seeded torrents, and sorted by seeders descending.
|
||||
|
||||
It also comes with a simple [Torrents.csv webserver](http://torrents-csv.ml)
|
||||
It also comes with a simple [Torrents.csv webserver](https://torrents-csv.ml)
|
||||
|
||||
![img](https://i.imgur.com/qVmSVMC.png)
|
||||
|
||||
|
|
|
@ -44,60 +44,61 @@ for torrent_file in *.torrent; do
|
|||
# Get fields from transmission
|
||||
|
||||
show_text=$(transmission-show "$torrent_file")
|
||||
# echo "show text = $show_text"
|
||||
|
||||
name=$(grep -Po -m 1 'Name: \K.*' <<< $show_text)
|
||||
name=$(sed 's/;/\\;/g' <<< $name) # Escape the commas for .csv
|
||||
|
||||
# Size: Unfortunately this will chop off some sigfigs
|
||||
size=$(grep -Po 'Total Size: \K.*' <<< $show_text)
|
||||
size_bytes=$(~/.local/bin/humanfriendly --parse-size="$size") # Convert to bytes
|
||||
|
||||
infohash=$(grep -Po 'Hash: \K.*' <<< $show_text)
|
||||
|
||||
# Convert the created date
|
||||
date_string=$(grep -Po 'Created on: \K.*' <<< $show_text)
|
||||
created_date=""
|
||||
if [[ "$date_string" == "Unknown" ]]; then
|
||||
created_date=$(date +%s)
|
||||
else
|
||||
created_date=$(date -d "${date_string}" +"%s")
|
||||
fi
|
||||
|
||||
# Scrape for seeder counts
|
||||
scrape_text=$(timeout 20 python -m torrent_tracker_scraper.scraper \
|
||||
-i "$infohash" \
|
||||
-t tracker.coppersurfer.tk -p 6969)
|
||||
# -t tracker.internetwarriors.net -p 1337
|
||||
# -t tracker.opentrackr.org -p 1337
|
||||
seeders=$(grep -Po 'Seeds: \K[0-9]+' <<< $scrape_text)
|
||||
leechers=$(grep -Po 'Leechers: \K[0-9]+' <<< $scrape_text)
|
||||
completed=$(grep -Po 'Completed: \K[0-9]+' <<< $scrape_text)
|
||||
scraped_date=$(date +%s)
|
||||
|
||||
# Construct add line
|
||||
add_line="$infohash;$name;$size_bytes;$created_date;$seeders;$leechers;$completed;$scraped_date"
|
||||
|
||||
# Only add the line if there are seeds, and the infohash doesn't already exist
|
||||
if (( $seeders > 0 )); then
|
||||
|
||||
# If the infohash already exists, replace the line
|
||||
# If the infohash already exists, don't do anything
|
||||
found_line=$(rg -n $infohash $torrents_csv | cut -d : -f 1)
|
||||
if [ ! -z $found_line ]; then
|
||||
sed -i "$found_line c$add_line" $torrents_csv
|
||||
echo -e "Found $name, updating peers"
|
||||
echo "$infohash already exists"
|
||||
else
|
||||
# Append the add lines to the torrents.csv file
|
||||
echo -e "\n$add_line" >> $torrents_csv
|
||||
truncate -s -1 $torrents_csv # Removing last newline
|
||||
echo -e "Added $name"
|
||||
fi
|
||||
else
|
||||
echo -e "$name has no seeders."
|
||||
fi
|
||||
|
||||
name=$(grep -Po -m 1 'Name: \K.*' <<< $show_text)
|
||||
name=$(sed 's/;/\\;/g' <<< $name) # Escape the commas for .csv
|
||||
|
||||
# Size: Unfortunately this will chop off some sigfigs
|
||||
size=$(grep -Po 'Total Size: \K.*' <<< $show_text)
|
||||
size_bytes=$(~/.local/bin/humanfriendly --parse-size="$size") # Convert to bytes
|
||||
|
||||
|
||||
|
||||
# Convert the created date
|
||||
date_string=$(grep -Po 'Created on: \K.*' <<< $show_text)
|
||||
created_date=""
|
||||
if [[ "$date_string" == "Unknown" ]]; then
|
||||
created_date=$(date +%s)
|
||||
else
|
||||
created_date=$(date -d "${date_string}" +"%s")
|
||||
fi
|
||||
|
||||
# Scrape for seeder counts
|
||||
scrape_text=$(timeout 20 python -m torrent_tracker_scraper.scraper \
|
||||
-i "$infohash" \
|
||||
-t tracker.coppersurfer.tk -p 6969)
|
||||
# -t tracker.internetwarriors.net -p 1337
|
||||
# -t tracker.opentrackr.org -p 1337
|
||||
seeders=$(grep -Po 'Seeds: \K[0-9]+' <<< $scrape_text)
|
||||
leechers=$(grep -Po 'Leechers: \K[0-9]+' <<< $scrape_text)
|
||||
completed=$(grep -Po 'Completed: \K[0-9]+' <<< $scrape_text)
|
||||
scraped_date=$(date +%s)
|
||||
|
||||
# Construct add line
|
||||
add_line="$infohash;$name;$size_bytes;$created_date;$seeders;$leechers;$completed;$scraped_date"
|
||||
|
||||
# Only add the line if there are seeds, and the infohash doesn't already exist
|
||||
if (( $seeders > 0 )); then
|
||||
|
||||
# Append the add lines to the torrents.csv file
|
||||
echo -e "\n$add_line" >> $torrents_csv
|
||||
truncate -s -1 $torrents_csv # Removing last newline
|
||||
echo -e "Added $name"
|
||||
else
|
||||
echo -e "$name has no seeders."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
popd
|
||||
cd scripts
|
||||
. prune.sh
|
||||
|
||||
|
|
Loading…
Reference in New Issue