Fixing cloudflare, Scraping magnetdl. Fixes #38

2019-01-25 19:58:49 -08:00 · 2019-01-25 19:58:49 -08:00 · ef0094eefd
commit ef0094eefd
parent 59c041ad61
2 changed files with 106 additions and 15 deletions
--- a/new_torrents_fetcher/src/cf.py
+++ b/new_torrents_fetcher/src/cf.py
@ -0,0 +1,8 @@
 import cfscrape
 request = "GET / HTTP/1.1\r\n"
 cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
 request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
 # cookie = "Cookie: %s" % (cookie_value)
 print (request)
--- a/new_torrents_fetcher/src/main.rs
+++ b/new_torrents_fetcher/src/main.rs
@ -5,12 +5,14 @@ extern crate select;
 use clap::{App, Arg};
 use select::document::Document;
 use select::predicate::{Attr, Class, Name, Predicate};
 use std::fs;
 use std::path::Path;
 use std::process::Command;
 use std::{thread, time};
-// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers'
+static mut COOKIE: &str = "";
 static mut USER_AGENT: &str = "";
 const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150";
 fn main() {
  let matches = App::new("New Torrents Fetcher")
@ -38,12 +40,14 @@ fn main() {
  let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());
  fetch_cloudflare_cookie();
  magnetdl(save_dir);
  skytorrents(save_dir);
  leetx(save_dir);
  if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
    torrents_csv_scan(Path::new(t), save_dir);
  }
  skytorrents(save_dir);
  leetx(save_dir);
 }
 fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
@ -53,14 +57,47 @@ fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
 }
 fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
  println!("Scanning torrent infohashes...");
  let mut rdr = csv::ReaderBuilder::new()
    .delimiter(b';')
    .from_path(torrents_csv_file)
    .unwrap();
-  rdr
+  rdr.records().map(|x| x.unwrap()[0].to_string()).collect()
-    .records()
+}
-    .map(|x| x.unwrap()[0].to_string())
+
-    .collect()
+fn magnetdl(save_dir: &Path) {
  let page_limit = 30;
  let base_url = "https://magnetdl.com";
  let mut pages: Vec<String> = Vec::new();
  let types = ["software", "movies", "games", "e-books", "tv", "music"];
  // https://www.magnetdl.com/download/software/se/desc/1/
  for c_type in types.iter() {
    for i in 1..page_limit {
      let page = format!("{}/download/{}/se/desc/{}/", base_url, c_type, i);
      pages.push(page);
    }
  }
  for page in pages.iter() {
    println!("Fetching page {}", page);
    let html = match fetch_html(page) {
      Ok(t) => t,
      _err => continue,
    };
    let document = Document::from(&html[..]);
    for row in document.find(Class("m").descendant(Name("a"))) {
      let hash = match row.attr("href") {
        Some(t) => t.to_string().chars().skip(20).take(40).collect(),
        None => continue,
      };
      fetch_torrent(hash, save_dir);
    }
  }
 }
 fn skytorrents(save_dir: &Path) {
@ -196,11 +233,12 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
    .unwrap();
  if !Path::new(&full_path).exists() {
-    Command::new("curl")
+    unsafe {
      Command::new("curl")
      .args(&[
        &url,
        "-H",
-        "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",
+        USER_AGENT,
        "-H",
        COOKIE,
        "--compressed",
@ -208,12 +246,57 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
        &full_path,
        "-s",
      ])
-      .status()
+      .output()
      .expect("curl command failed");
-    println!("{} saved.", &full_path);
+      check_cloud_flare(Path::new(&full_path));
      thread::sleep(time::Duration::from_millis(2000));
      println!("{} saved.", &full_path);
    }
  }
 }
-fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
+fn check_cloud_flare(file: &Path) {
-  reqwest::get(url)?.text()
+  let data = match fs::read_to_string(file) {
    Ok(t) => t,
    _err => return,
  };
  if data == "" {
    return;
  }
  let first_line = &data[..5];
  if first_line == "<!DOC" {
    fs::remove_file(file);
    println!("Cloudflare failed, re-fetching.");
    fetch_cloudflare_cookie();
  }
 }
 fn fetch_cloudflare_cookie() {
  unsafe {
    println!("Fetching new CloudFlare Cookie...");
    let output = Command::new("python")
      .args(&["src/cf.py"])
      .output()
      .expect("python command failed");
    let out = string_to_static_str(format!("{}", String::from_utf8_lossy(&output.stdout)));
    let split: Vec<&str> = out.lines().collect();
    COOKIE = split[1];
    USER_AGENT = split[2];
  }
 }
 fn string_to_static_str(s: String) -> &'static str {
  Box::leak(s.into_boxed_str())
 }
 fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
  reqwest::Client::new()
    .get(url)
    .header("Accept", "text/html")
    .send()?
    .text()
 }