Fixing 1337x failures to continue.

This commit is contained in:
Dessalines 2019-01-01 11:03:21 -08:00
parent 6dcc7d5d07
commit 83e5c61c2f
1 changed files with 70 additions and 52 deletions

View File

@ -48,24 +48,24 @@ fn skytorrents() {
let html = fetch_html(page);
let document = Document::from(&html[..]);
for row in document.find(Attr("id", "results").descendant(Name("tr"))) {
let info_hash: String = row
.find(Name("td").descendant(Name("a")))
.nth(2)
.unwrap()
.attr("href")
.unwrap()
.chars()
.skip(20)
.take(40)
.collect();
let info_hash_td = match row.find(Name("td").descendant(Name("a"))).nth(2) {
Some(t) => t,
None => continue,
};
let info_hash = match info_hash_td.attr("href") {
Some(t) => t.chars().skip(20).take(40).collect(),
None => continue,
};
let name = row
.find(Name("td").descendant(Name("a")))
.nth(0)
.unwrap()
.text();
let name = match row.find(Name("td").descendant(Name("a"))).nth(0) {
Some(t) => t.text(),
None => continue,
};
let size = row.find(Name("td")).nth(1).unwrap().text();
let size = match row.find(Name("td")).nth(1) {
Some(t) => t.text(),
None => continue,
};
let size_bytes = convert_to_bytes(size);
// simulate a start and scraped date
@ -74,9 +74,14 @@ fn skytorrents() {
.expect("Time went backwards")
.as_secs();
let seeders = row.find(Name("td")).nth(4).unwrap().text().replace(",", "");
let leechers = row.find(Name("td")).nth(5).unwrap().text().replace(",", "");
let seeders = match row.find(Name("td")).nth(4) {
Some(t) => t.text().replace(",", ""),
None => continue,
};
let leechers = match row.find(Name("td")).nth(5) {
Some(t) => t.text().replace(",", ""),
None => continue,
};
// infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
let add_line = [
@ -131,45 +136,58 @@ fn leetx() {
.descendant(Name("tbody"))
.descendant(Name("tr")),
) {
let detail_page_url = row
.find(Class("coll-1"))
.nth(0)
.unwrap()
.find(Name("a"))
.nth(1)
.unwrap()
.attr("href")
.unwrap();
let detail_full_url = format!("{}{}", base_url, detail_page_url);
let detail_page_url_col = match row.find(Class("coll-1")).nth(0) {
Some(t) => t,
None => continue,
};
let detail_page_url_name = match detail_page_url_col.find(Name("a")).nth(1) {
Some(t) => t,
None => continue,
};
let detail_page_url_href = match detail_page_url_name.attr("href") {
Some(t) => t,
None => continue,
};
let detail_full_url = format!("{}{}", base_url, detail_page_url_href);
println!("Fetching page {}", detail_full_url);
let detail_html = fetch_html(&detail_full_url);
let detail_document = Document::from(&detail_html[..]);
let info_hash = detail_document
let info_hash = match detail_document
.find(Class("infohash-box").descendant(Name("span")))
.nth(0)
.unwrap()
.text()
.to_lowercase();
{
Some(t) => t.text().to_lowercase(),
None => continue,
};
let name_col = match row.find(Class("coll-1")).nth(0) {
Some(t) => t,
None => continue,
};
let name = match name_col.find(Name("a")).nth(1) {
Some(t) => t.text(),
None => continue,
};
let seeders = match row.find(Class("coll-2")).nth(0) {
Some(t) => t.text(),
None => continue,
};
let leechers = match row.find(Class("coll-3")).nth(0) {
Some(t) => t.text(),
None => continue,
};
let size_col = match row.find(Class("coll-4")).nth(0) {
Some(t) => t,
None => continue,
};
let size = match size_col.children().nth(0) {
Some(t) => t.text().replace(",", ""),
None => continue,
};
let name = row
.find(Class("coll-1"))
.nth(0)
.unwrap()
.find(Name("a"))
.nth(1)
.unwrap()
.text();
let seeders = row.find(Class("coll-2")).nth(0).unwrap().text();
let leechers = row.find(Class("coll-3")).nth(0).unwrap().text();
let size = row
.find(Class("coll-4"))
.nth(0)
.unwrap()
.children()
.nth(0)
.unwrap()
.text()
.replace(",", "");;
let size_bytes = convert_to_bytes(size);
let created_unix = SystemTime::now()