From 83e5c61c2fd0246f9a5d6b13fd7863a7a4d9991f Mon Sep 17 00:00:00 2001 From: Dessalines Date: Tue, 1 Jan 2019 11:03:21 -0800 Subject: [PATCH] Fixing 1337x failures to continue. --- new_torrents_fetcher/src/main.rs | 122 ++++++++++++++++++------------- 1 file changed, 70 insertions(+), 52 deletions(-) diff --git a/new_torrents_fetcher/src/main.rs b/new_torrents_fetcher/src/main.rs index 98f158e..cd4ccc6 100644 --- a/new_torrents_fetcher/src/main.rs +++ b/new_torrents_fetcher/src/main.rs @@ -48,24 +48,24 @@ fn skytorrents() { let html = fetch_html(page); let document = Document::from(&html[..]); for row in document.find(Attr("id", "results").descendant(Name("tr"))) { - let info_hash: String = row - .find(Name("td").descendant(Name("a"))) - .nth(2) - .unwrap() - .attr("href") - .unwrap() - .chars() - .skip(20) - .take(40) - .collect(); + let info_hash_td = match row.find(Name("td").descendant(Name("a"))).nth(2) { + Some(t) => t, + None => continue, + }; + let info_hash = match info_hash_td.attr("href") { + Some(t) => t.chars().skip(20).take(40).collect(), + None => continue, + }; - let name = row - .find(Name("td").descendant(Name("a"))) - .nth(0) - .unwrap() - .text(); + let name = match row.find(Name("td").descendant(Name("a"))).nth(0) { + Some(t) => t.text(), + None => continue, + }; - let size = row.find(Name("td")).nth(1).unwrap().text(); + let size = match row.find(Name("td")).nth(1) { + Some(t) => t.text(), + None => continue, + }; let size_bytes = convert_to_bytes(size); // simulate a start and scraped date @@ -74,9 +74,14 @@ fn skytorrents() { .expect("Time went backwards") .as_secs(); - let seeders = row.find(Name("td")).nth(4).unwrap().text().replace(",", ""); - let leechers = row.find(Name("td")).nth(5).unwrap().text().replace(",", ""); - + let seeders = match row.find(Name("td")).nth(4) { + Some(t) => t.text().replace(",", ""), + None => continue, + }; + let leechers = match row.find(Name("td")).nth(5) { + Some(t) => t.text().replace(",", ""), + None => continue, + }; // infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date let add_line = [ @@ -131,45 +136,58 @@ fn leetx() { .descendant(Name("tbody")) .descendant(Name("tr")), ) { - let detail_page_url = row - .find(Class("coll-1")) - .nth(0) - .unwrap() - .find(Name("a")) - .nth(1) - .unwrap() - .attr("href") - .unwrap(); - let detail_full_url = format!("{}{}", base_url, detail_page_url); + let detail_page_url_col = match row.find(Class("coll-1")).nth(0) { + Some(t) => t, + None => continue, + }; + let detail_page_url_name = match detail_page_url_col.find(Name("a")).nth(1) { + Some(t) => t, + None => continue, + }; + let detail_page_url_href = match detail_page_url_name.attr("href") { + Some(t) => t, + None => continue, + }; + + let detail_full_url = format!("{}{}", base_url, detail_page_url_href); println!("Fetching page {}", detail_full_url); let detail_html = fetch_html(&detail_full_url); let detail_document = Document::from(&detail_html[..]); - let info_hash = detail_document + let info_hash = match detail_document .find(Class("infohash-box").descendant(Name("span"))) .nth(0) - .unwrap() - .text() - .to_lowercase(); + { + Some(t) => t.text().to_lowercase(), + None => continue, + }; + + let name_col = match row.find(Class("coll-1")).nth(0) { + Some(t) => t, + None => continue, + }; + let name = match name_col.find(Name("a")).nth(1) { + Some(t) => t.text(), + None => continue, + }; + + let seeders = match row.find(Class("coll-2")).nth(0) { + Some(t) => t.text(), + None => continue, + }; + let leechers = match row.find(Class("coll-3")).nth(0) { + Some(t) => t.text(), + None => continue, + }; + + let size_col = match row.find(Class("coll-4")).nth(0) { + Some(t) => t, + None => continue, + }; + let size = match size_col.children().nth(0) { + Some(t) => t.text().replace(",", ""), + None => continue, + }; - let name = row - .find(Class("coll-1")) - .nth(0) - .unwrap() - .find(Name("a")) - .nth(1) - .unwrap() - .text(); - let seeders = row.find(Class("coll-2")).nth(0).unwrap().text(); - let leechers = row.find(Class("coll-3")).nth(0).unwrap().text(); - let size = row - .find(Class("coll-4")) - .nth(0) - .unwrap() - .children() - .nth(0) - .unwrap() - .text() - .replace(",", "");; let size_bytes = convert_to_bytes(size); let created_unix = SystemTime::now()