Adding rust script to pull from skytorrents.

This commit is contained in:
Dessalines 2018-10-09 12:54:47 -07:00
parent 1f31cefc67
commit ac1a54eb1a
5 changed files with 1508 additions and 1 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
run.out run.out
old_greps.sh old_greps.sh
new_torrents_fetcher/target

1376
new_torrents_fetcher/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
[package]
name = "new_torrents_fetcher"
version = "0.1.0"
authors = ["Dessalines <happydooby@gmail.com>"]
[dependencies]
reqwest = "*"
select = "*"

View File

@ -0,0 +1,114 @@
extern crate reqwest;
extern crate select;
use select::document::Document;
use select::predicate::{Attr, Name, Predicate};
use std::fs;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::prelude::*;
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
const TMP_ADDS: &str = "tmp_adds.csv";
fn main() {
if Path::new(TMP_ADDS).exists() {
fs::remove_file(TMP_ADDS).unwrap();
}
File::create(TMP_ADDS).unwrap();
skytorrents();
}
fn skytorrents() {
let mut pages: Vec<String> = Vec::new();
for i in 1..20 {
let page = format!("{}{}", "https://www.skytorrents.lol/top100?page=", i);
pages.push(page);
}
for page in pages.iter() {
println!("Fetching page {}", page);
let html = fetch_html(page);
let document = Document::from(&html[..]);
for row in document.find(Attr("id", "results").descendant(Name("tr"))) {
let info_hash: String = row
.find(Name("td").descendant(Name("a")))
.nth(2)
.unwrap()
.attr("href")
.unwrap()
.chars()
.skip(20)
.take(40)
.collect();
let name = row
.find(Name("td").descendant(Name("a")))
.nth(0)
.unwrap()
.text();
let mut size = row.find(Name("td")).nth(1).unwrap().text();
let size_bytes = convert_to_bytes(size);
// simulate a start and scraped date
let created_unix = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
let seeders = row.find(Name("td")).nth(4).unwrap().text();
let leechers = row.find(Name("td")).nth(5).unwrap().text();
// infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
let add_line = [
info_hash,
name,
size_bytes.to_string(),
created_unix.to_string(),
seeders,
leechers,
"".to_string(),
created_unix.to_string(),
].join(";");
append_to_file(add_line);
}
}
}
fn fetch_html(url: &str) -> String {
reqwest::get(url).unwrap().text().unwrap()
}
fn convert_to_bytes(human: String) -> u64 {
let split = human.split(" ").collect::<Vec<&str>>();
let float: f32 = split[0].parse().unwrap();
let unit = split[1];
let mut num = (float * 100 as f32) as u64;
match unit {
"KB" => num *= 10u64.pow(3),
"MB" => num *= 10u64.pow(6),
"GB" => num *= 10u64.pow(9),
"TB" => num *= 10u64.pow(12),
"PB" => num *= 10u64.pow(15),
_ => println!("Unknown unit"),
}
num /= 100;
num
}
fn append_to_file(line: String) {
let mut file = OpenOptions::new()
.write(true)
.append(true)
.open(TMP_ADDS)
.unwrap();
if let Err(e) = writeln!(file, "{}", line) {
eprintln!("Couldn't write to file: {}", e);
}
}

8
update.sh Executable file
View File

@ -0,0 +1,8 @@
# This fetches from several torrent websites for new updates
pushd new_torrents_fetcher
cargo run
cat tmp_adds.csv >> ../torrents.csv
rm tmp_adds.csv
popd
. prune.sh