Adding 1337x fetching. Fixes #4

This commit is contained in:
Dessalines 2018-12-20 19:03:03 -08:00
parent ad53b489aa
commit 6441210d0e
1 changed files with 106 additions and 7 deletions

View File

@ -1,7 +1,7 @@
extern crate reqwest; extern crate reqwest;
extern crate select; extern crate select;
use select::document::Document; use select::document::Document;
use select::predicate::{Attr, Name, Predicate}; use select::predicate::{Attr, Class, Name, Predicate};
use std::fs; use std::fs;
use std::fs::File; use std::fs::File;
use std::fs::OpenOptions; use std::fs::OpenOptions;
@ -16,11 +16,11 @@ fn main() {
fs::remove_file(TMP_ADDS).unwrap(); fs::remove_file(TMP_ADDS).unwrap();
} }
File::create(TMP_ADDS).unwrap(); File::create(TMP_ADDS).unwrap();
leetx();
skytorrents(); skytorrents();
} }
fn skytorrents() { fn skytorrents() {
let page_limit = 100; let page_limit = 100;
let base_url = "https://www.skytorrents.lol"; let base_url = "https://www.skytorrents.lol";
@ -32,13 +32,15 @@ fn skytorrents() {
pages.push(page); pages.push(page);
} }
let types = ["video", "audio", "games", "software", "doc", "ebook", "yify", "epub", "xxx", "show", "album", "1080", ]; let types = [
"video", "audio", "games", "software", "doc", "ebook", "yify", "epub", "xxx", "show", "album",
"1080",
];
for c_type in types.iter() { for c_type in types.iter() {
for i in 1..page_limit { for i in 1..page_limit {
let page = format!("{}/top100?type={}&page={}", base_url, c_type, i); let page = format!("{}/top100?type={}&page={}", base_url, c_type, i);
pages.push(page); pages.push(page);
} }
} }
for page in pages.iter() { for page in pages.iter() {
@ -63,7 +65,7 @@ fn skytorrents() {
.unwrap() .unwrap()
.text(); .text();
let mut size = row.find(Name("td")).nth(1).unwrap().text(); let size = row.find(Name("td")).nth(1).unwrap().text();
let size_bytes = convert_to_bytes(size); let size_bytes = convert_to_bytes(size);
// simulate a start and scraped date // simulate a start and scraped date
@ -72,8 +74,8 @@ fn skytorrents() {
.expect("Time went backwards") .expect("Time went backwards")
.as_secs(); .as_secs();
let seeders = row.find(Name("td")).nth(4).unwrap().text().replace(",",""); let seeders = row.find(Name("td")).nth(4).unwrap().text().replace(",", "");
let leechers = row.find(Name("td")).nth(5).unwrap().text().replace(",",""); let leechers = row.find(Name("td")).nth(5).unwrap().text().replace(",", "");
// infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date // infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
@ -93,6 +95,103 @@ fn skytorrents() {
} }
} }
fn leetx() {
let page_limit = 50;
let base_url = "https://1337x.to";
let mut pages: Vec<String> = Vec::new();
let types = [
"Games",
"Anime",
"Apps",
"Documentaries",
"Movies",
"Music",
"Other",
"TV",
"XXX",
];
for i in 1..page_limit {
for c_type in types.iter() {
let page = format!("{}/sort-cat/{}/seeders/desc/{}/", base_url, c_type, i);
pages.push(page);
}
}
for page in pages.iter() {
println!("Fetching page {}", page);
let html = fetch_html(page);
let document = Document::from(&html[..]);
for row in document.find(
Class("table-list")
.descendant(Name("tbody"))
.descendant(Name("tr")),
) {
let detail_page_url = row
.find(Class("coll-1"))
.nth(0)
.unwrap()
.find(Name("a"))
.nth(1)
.unwrap()
.attr("href")
.unwrap();
let detail_full_url = format!("{}{}", base_url, detail_page_url);
println!("Fetching page {}", detail_full_url);
let detail_html = fetch_html(&detail_full_url);
let detail_document = Document::from(&detail_html[..]);
let info_hash = detail_document
.find(Class("infohash-box").descendant(Name("span")))
.nth(0)
.unwrap()
.text()
.to_lowercase();
let name = row
.find(Class("coll-1"))
.nth(0)
.unwrap()
.find(Name("a"))
.nth(1)
.unwrap()
.text();
let seeders = row.find(Class("coll-2")).nth(0).unwrap().text();
let leechers = row.find(Class("coll-3")).nth(0).unwrap().text();
let size = row
.find(Class("coll-4"))
.nth(0)
.unwrap()
.children()
.nth(0)
.unwrap()
.text();
let size_bytes = convert_to_bytes(size);
let created_unix = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
let add_line = [
info_hash,
name,
size_bytes.to_string(),
created_unix.to_string(),
seeders,
leechers,
"".to_string(),
created_unix.to_string(),
].join(";");
append_to_file(add_line);
}
}
}
fn fetch_html(url: &str) -> String { fn fetch_html(url: &str) -> String {
reqwest::get(url).unwrap().text().unwrap() reqwest::get(url).unwrap().text().unwrap()
} }