From 6441210d0e9a605fc5b806b4f54eb1bbcd4f3407 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Thu, 20 Dec 2018 19:03:03 -0800 Subject: [PATCH] Adding 1337x fetching. Fixes #4 --- new_torrents_fetcher/src/main.rs | 113 +++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 7 deletions(-) diff --git a/new_torrents_fetcher/src/main.rs b/new_torrents_fetcher/src/main.rs index cd01338..14bacb0 100644 --- a/new_torrents_fetcher/src/main.rs +++ b/new_torrents_fetcher/src/main.rs @@ -1,7 +1,7 @@ extern crate reqwest; extern crate select; use select::document::Document; -use select::predicate::{Attr, Name, Predicate}; +use select::predicate::{Attr, Class, Name, Predicate}; use std::fs; use std::fs::File; use std::fs::OpenOptions; @@ -16,11 +16,11 @@ fn main() { fs::remove_file(TMP_ADDS).unwrap(); } File::create(TMP_ADDS).unwrap(); + leetx(); skytorrents(); } fn skytorrents() { - let page_limit = 100; let base_url = "https://www.skytorrents.lol"; @@ -32,13 +32,15 @@ fn skytorrents() { pages.push(page); } - let types = ["video", "audio", "games", "software", "doc", "ebook", "yify", "epub", "xxx", "show", "album", "1080", ]; + let types = [ + "video", "audio", "games", "software", "doc", "ebook", "yify", "epub", "xxx", "show", "album", + "1080", + ]; for c_type in types.iter() { for i in 1..page_limit { let page = format!("{}/top100?type={}&page={}", base_url, c_type, i); pages.push(page); } - } for page in pages.iter() { @@ -63,7 +65,7 @@ fn skytorrents() { .unwrap() .text(); - let mut size = row.find(Name("td")).nth(1).unwrap().text(); + let size = row.find(Name("td")).nth(1).unwrap().text(); let size_bytes = convert_to_bytes(size); // simulate a start and scraped date @@ -72,8 +74,8 @@ fn skytorrents() { .expect("Time went backwards") .as_secs(); - let seeders = row.find(Name("td")).nth(4).unwrap().text().replace(",",""); - let leechers = row.find(Name("td")).nth(5).unwrap().text().replace(",",""); + let seeders = row.find(Name("td")).nth(4).unwrap().text().replace(",", ""); + let leechers = row.find(Name("td")).nth(5).unwrap().text().replace(",", ""); // infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date @@ -93,6 +95,103 @@ fn skytorrents() { } } +fn leetx() { + let page_limit = 50; + + let base_url = "https://1337x.to"; + + let mut pages: Vec = Vec::new(); + + let types = [ + "Games", + "Anime", + "Apps", + "Documentaries", + "Movies", + "Music", + "Other", + "TV", + "XXX", + ]; + + for i in 1..page_limit { + for c_type in types.iter() { + let page = format!("{}/sort-cat/{}/seeders/desc/{}/", base_url, c_type, i); + pages.push(page); + } + } + + for page in pages.iter() { + println!("Fetching page {}", page); + let html = fetch_html(page); + let document = Document::from(&html[..]); + + for row in document.find( + Class("table-list") + .descendant(Name("tbody")) + .descendant(Name("tr")), + ) { + let detail_page_url = row + .find(Class("coll-1")) + .nth(0) + .unwrap() + .find(Name("a")) + .nth(1) + .unwrap() + .attr("href") + .unwrap(); + let detail_full_url = format!("{}{}", base_url, detail_page_url); + println!("Fetching page {}", detail_full_url); + let detail_html = fetch_html(&detail_full_url); + let detail_document = Document::from(&detail_html[..]); + let info_hash = detail_document + .find(Class("infohash-box").descendant(Name("span"))) + .nth(0) + .unwrap() + .text() + .to_lowercase(); + + let name = row + .find(Class("coll-1")) + .nth(0) + .unwrap() + .find(Name("a")) + .nth(1) + .unwrap() + .text(); + let seeders = row.find(Class("coll-2")).nth(0).unwrap().text(); + let leechers = row.find(Class("coll-3")).nth(0).unwrap().text(); + let size = row + .find(Class("coll-4")) + .nth(0) + .unwrap() + .children() + .nth(0) + .unwrap() + .text(); + let size_bytes = convert_to_bytes(size); + + let created_unix = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + + let add_line = [ + info_hash, + name, + size_bytes.to_string(), + created_unix.to_string(), + seeders, + leechers, + "".to_string(), + created_unix.to_string(), + ].join(";"); + + append_to_file(add_line); + } + } +} + fn fetch_html(url: &str) -> String { reqwest::get(url).unwrap().text().unwrap() }