torrents.csv/new_torrents_fetcher/src/main.rs

220 lines
6.1 KiB
Rust

extern crate clap;
extern crate csv;
extern crate reqwest;
extern crate select;
use clap::{App, Arg};
use select::document::Document;
use select::predicate::{Attr, Class, Name, Predicate};
use std::path::Path;
use std::process::Command;
// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers'
const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150";
fn main() {
let matches = App::new("New Torrents Fetcher")
.version("0.1.0")
.author("Dessalines")
.about("Fetches new torrent files from various sites.")
.arg(
Arg::with_name("TORRENT_SAVE_DIR")
.short("s")
.long("save_dir")
.value_name("DIR")
.takes_value(true)
.help("Where to save the torrent files.")
.required(true),
)
.arg(
Arg::with_name("TORRENTS_CSV_FILE")
.short("t")
.long("torrents_csv")
.value_name("FILE")
.takes_value(true)
.help("The location of a torrents.csv file. If given, it will download those infohashes."),
)
.get_matches();
let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
torrents_csv_scan(Path::new(t), save_dir);
}
skytorrents(save_dir);
leetx(save_dir);
}
fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
for hash in collect_info_hashes(torrents_csv_file) {
fetch_torrent(hash, save_dir);
}
}
fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
let mut rdr = csv::ReaderBuilder::new()
.delimiter(b';')
.from_path(torrents_csv_file)
.unwrap();
rdr
.records()
.map(|x| x.unwrap()[0].to_string())
.collect()
}
fn skytorrents(save_dir: &Path) {
let page_limit = 100;
let base_url = "https://www.skytorrents.lol";
let mut pages: Vec<String> = Vec::new();
for i in 1..page_limit {
let page = format!("{}/top100?page={}", base_url, i);
pages.push(page);
}
let types = [
"video", "audio", "games", "software", "doc", "ebook", "yify", "epub", "xxx", "show", "album",
"1080",
];
for c_type in types.iter() {
for i in 1..page_limit {
let page = format!("{}/top100?type={}&page={}", base_url, c_type, i);
pages.push(page);
}
}
for page in pages.iter() {
println!("Fetching page {}", page);
let html = match fetch_html(page) {
Ok(t) => t,
_err => continue,
};
let document = Document::from(&html[..]);
for row in document.find(Attr("id", "results").descendant(Name("tr"))) {
let hash_td = match row.find(Name("td").descendant(Name("a"))).nth(2) {
Some(t) => t,
None => continue,
};
let hash = match hash_td.attr("href") {
Some(t) => t.chars().skip(20).take(40).collect(),
None => continue,
};
fetch_torrent(hash, save_dir);
}
}
}
fn leetx(save_dir: &Path) {
let page_limit = 50;
let base_url = "https://1337x.to";
let mut pages: Vec<String> = Vec::new();
let types = [
"Games",
"Anime",
"Apps",
"Documentaries",
"Movies",
"Music",
"Other",
"TV",
"XXX",
];
for i in 1..page_limit {
for c_type in types.iter() {
let page = format!("{}/sort-cat/{}/seeders/desc/{}/", base_url, c_type, i);
pages.push(page);
}
}
for page in pages.iter() {
println!("Fetching page {}", page);
let html = match fetch_html(page) {
Ok(t) => t,
_err => continue,
};
let document = Document::from(&html[..]);
for row in document.find(
Class("table-list")
.descendant(Name("tbody"))
.descendant(Name("tr")),
) {
let detail_page_url_col = match row.find(Class("coll-1")).nth(0) {
Some(t) => t,
None => continue,
};
let detail_page_url_name = match detail_page_url_col.find(Name("a")).nth(1) {
Some(t) => t,
None => continue,
};
let detail_page_url_href = match detail_page_url_name.attr("href") {
Some(t) => t,
None => continue,
};
let detail_full_url = format!("{}{}", base_url, detail_page_url_href);
println!("Fetching page {}", detail_full_url);
let detail_html = match fetch_html(&detail_full_url) {
Ok(t) => t,
_err => continue,
};
let detail_document = Document::from(&detail_html[..]);
let hash = match detail_document
.find(Class("infohash-box").descendant(Name("span")))
.nth(0)
{
Some(t) => t.text().to_lowercase(),
None => continue,
};
fetch_torrent(hash, save_dir);
}
}
}
fn fetch_torrent(hash: String, save_dir: &Path) {
// Curl is the only thing that works with itorrent
let file_name = format!("{}.torrent", hash);
let url = format!(
"https://itorrents.org/torrent/{}.torrent",
&hash.to_ascii_uppercase()
);
let full_path = save_dir
.join(&file_name)
.into_os_string()
.into_string()
.unwrap();
if !Path::new(&full_path).exists() {
Command::new("curl")
.args(&[
&url,
"-H",
"User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",
"-H",
COOKIE,
"--compressed",
"-o",
&full_path,
"-s",
])
.status()
.expect("curl command failed");
println!("{} saved.", &full_path);
}
}
fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
reqwest::get(url)?.text()
}