torrents.csv/new_torrents_fetcher/src/main.rs

extern crate clap;
extern crate csv;
extern crate reqwest;
extern crate select;
use clap::{App, Arg};
use select::document::Document;
use select::predicate::{Attr, Class, Name, Predicate};
use std::path::Path;
use std::process::Command;

// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers'

const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150";

fn main() {
  let matches = App::new("New Torrents Fetcher")
    .version("0.1.0")
    .author("Dessalines")
    .about("Fetches new torrent files from various sites.")
    .arg(
      Arg::with_name("TORRENT_SAVE_DIR")
        .short("s")
        .long("save_dir")
        .value_name("DIR")
        .takes_value(true)
        .help("Where to save the torrent files.")
        .required(true),
    )
    .arg(
      Arg::with_name("TORRENTS_CSV_FILE")
        .short("t")
        .long("torrents_csv")
        .value_name("FILE")
        .takes_value(true)
        .help("The location of a torrents.csv file. If given, it will download those infohashes."),
    )
    .get_matches();

  let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());

  if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
    torrents_csv_scan(Path::new(t), save_dir);
  }

  skytorrents(save_dir);
  leetx(save_dir);
}

fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
  for hash in collect_info_hashes(torrents_csv_file) {
    fetch_torrent(hash, save_dir);
  }
}

fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
  let mut rdr = csv::ReaderBuilder::new()
    .delimiter(b';')
    .from_path(torrents_csv_file)
    .unwrap();
  rdr
    .records()
    .map(|x| x.unwrap()[0].to_string())
    .collect()
}

fn skytorrents(save_dir: &Path) {
  let page_limit = 100;

  let base_url = "https://www.skytorrents.lol";

  let mut pages: Vec<String> = Vec::new();

  for i in 1..page_limit {
    let page = format!("{}/top100?page={}", base_url, i);
    pages.push(page);
  }

  let types = [
    "video", "audio", "games", "software", "doc", "ebook", "yify", "epub", "xxx", "show", "album",
    "1080",
  ];
  for c_type in types.iter() {
    for i in 1..page_limit {
      let page = format!("{}/top100?type={}&page={}", base_url, c_type, i);
      pages.push(page);
    }
  }

  for page in pages.iter() {
    println!("Fetching page {}", page);
    let html = match fetch_html(page) {
      Ok(t) => t,
      _err => continue,
    };
    let document = Document::from(&html[..]);
    for row in document.find(Attr("id", "results").descendant(Name("tr"))) {
      let hash_td = match row.find(Name("td").descendant(Name("a"))).nth(2) {
        Some(t) => t,
        None => continue,
      };
      let hash = match hash_td.attr("href") {
        Some(t) => t.chars().skip(20).take(40).collect(),
        None => continue,
      };

      fetch_torrent(hash, save_dir);
    }
  }
}

fn leetx(save_dir: &Path) {
  let page_limit = 50;

  let base_url = "https://1337x.to";

  let mut pages: Vec<String> = Vec::new();

  let types = [
    "Games",
    "Anime",
    "Apps",
    "Documentaries",
    "Movies",
    "Music",
    "Other",
    "TV",
    "XXX",
  ];

  for i in 1..page_limit {
    for c_type in types.iter() {
      let page = format!("{}/sort-cat/{}/seeders/desc/{}/", base_url, c_type, i);
      pages.push(page);
    }
  }

  for page in pages.iter() {
    println!("Fetching page {}", page);
    let html = match fetch_html(page) {
      Ok(t) => t,
      _err => continue,
    };
    let document = Document::from(&html[..]);

    for row in document.find(
      Class("table-list")
        .descendant(Name("tbody"))
        .descendant(Name("tr")),
    ) {
      let detail_page_url_col = match row.find(Class("coll-1")).nth(0) {
        Some(t) => t,
        None => continue,
      };
      let detail_page_url_name = match detail_page_url_col.find(Name("a")).nth(1) {
        Some(t) => t,
        None => continue,
      };
      let detail_page_url_href = match detail_page_url_name.attr("href") {
        Some(t) => t,
        None => continue,
      };

      let detail_full_url = format!("{}{}", base_url, detail_page_url_href);
      println!("Fetching page {}", detail_full_url);
      let detail_html = match fetch_html(&detail_full_url) {
        Ok(t) => t,
        _err => continue,
      };

      let detail_document = Document::from(&detail_html[..]);
      let hash = match detail_document
        .find(Class("infohash-box").descendant(Name("span")))
        .nth(0)
      {
        Some(t) => t.text().to_lowercase(),
        None => continue,
      };

      fetch_torrent(hash, save_dir);
    }
  }
}

fn fetch_torrent(hash: String, save_dir: &Path) {
  // Curl is the only thing that works with itorrent
  let file_name = format!("{}.torrent", hash);
  let url = format!(
    "https://itorrents.org/torrent/{}.torrent",
    &hash.to_ascii_uppercase()
  );

  let full_path = save_dir
    .join(&file_name)
    .into_os_string()
    .into_string()
    .unwrap();

  if !Path::new(&full_path).exists() {
    Command::new("curl")
      .args(&[
        &url,
        "-H",
        "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",
        "-H",
        COOKIE,
        "--compressed",
        "-o",
        &full_path,
        "-s",
      ])
      .status()
      .expect("curl command failed");
    println!("{} saved.", &full_path);
  }
}

fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
  reqwest::get(url)?.text()
}