Adding rust script to pull from skytorrents.
This commit is contained in:
parent
1f31cefc67
commit
ac1a54eb1a
|
@ -1,2 +1,3 @@
|
|||
run.out
|
||||
old_greps.sh
|
||||
new_torrents_fetcher/target
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "new_torrents_fetcher"
|
||||
version = "0.1.0"
|
||||
authors = ["Dessalines <happydooby@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
reqwest = "*"
|
||||
select = "*"
|
|
@ -0,0 +1,114 @@
|
|||
extern crate reqwest;
|
||||
extern crate select;
|
||||
use select::document::Document;
|
||||
use select::predicate::{Attr, Name, Predicate};
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
const TMP_ADDS: &str = "tmp_adds.csv";
|
||||
|
||||
fn main() {
|
||||
if Path::new(TMP_ADDS).exists() {
|
||||
fs::remove_file(TMP_ADDS).unwrap();
|
||||
}
|
||||
File::create(TMP_ADDS).unwrap();
|
||||
skytorrents();
|
||||
}
|
||||
|
||||
fn skytorrents() {
|
||||
let mut pages: Vec<String> = Vec::new();
|
||||
for i in 1..20 {
|
||||
let page = format!("{}{}", "https://www.skytorrents.lol/top100?page=", i);
|
||||
pages.push(page);
|
||||
}
|
||||
|
||||
for page in pages.iter() {
|
||||
println!("Fetching page {}", page);
|
||||
let html = fetch_html(page);
|
||||
let document = Document::from(&html[..]);
|
||||
for row in document.find(Attr("id", "results").descendant(Name("tr"))) {
|
||||
let info_hash: String = row
|
||||
.find(Name("td").descendant(Name("a")))
|
||||
.nth(2)
|
||||
.unwrap()
|
||||
.attr("href")
|
||||
.unwrap()
|
||||
.chars()
|
||||
.skip(20)
|
||||
.take(40)
|
||||
.collect();
|
||||
|
||||
let name = row
|
||||
.find(Name("td").descendant(Name("a")))
|
||||
.nth(0)
|
||||
.unwrap()
|
||||
.text();
|
||||
|
||||
let mut size = row.find(Name("td")).nth(1).unwrap().text();
|
||||
let size_bytes = convert_to_bytes(size);
|
||||
|
||||
// simulate a start and scraped date
|
||||
let created_unix = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards")
|
||||
.as_secs();
|
||||
|
||||
let seeders = row.find(Name("td")).nth(4).unwrap().text();
|
||||
let leechers = row.find(Name("td")).nth(5).unwrap().text();
|
||||
|
||||
// infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
|
||||
|
||||
let add_line = [
|
||||
info_hash,
|
||||
name,
|
||||
size_bytes.to_string(),
|
||||
created_unix.to_string(),
|
||||
seeders,
|
||||
leechers,
|
||||
"".to_string(),
|
||||
created_unix.to_string(),
|
||||
].join(";");
|
||||
|
||||
append_to_file(add_line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_html(url: &str) -> String {
|
||||
reqwest::get(url).unwrap().text().unwrap()
|
||||
}
|
||||
|
||||
fn convert_to_bytes(human: String) -> u64 {
|
||||
let split = human.split(" ").collect::<Vec<&str>>();
|
||||
let float: f32 = split[0].parse().unwrap();
|
||||
let unit = split[1];
|
||||
|
||||
let mut num = (float * 100 as f32) as u64;
|
||||
|
||||
match unit {
|
||||
"KB" => num *= 10u64.pow(3),
|
||||
"MB" => num *= 10u64.pow(6),
|
||||
"GB" => num *= 10u64.pow(9),
|
||||
"TB" => num *= 10u64.pow(12),
|
||||
"PB" => num *= 10u64.pow(15),
|
||||
_ => println!("Unknown unit"),
|
||||
}
|
||||
num /= 100;
|
||||
num
|
||||
}
|
||||
|
||||
fn append_to_file(line: String) {
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.append(true)
|
||||
.open(TMP_ADDS)
|
||||
.unwrap();
|
||||
|
||||
if let Err(e) = writeln!(file, "{}", line) {
|
||||
eprintln!("Couldn't write to file: {}", e);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue