Fixing cloudflare, Scraping magnetdl. Fixes #38

This commit is contained in:
Dessalines 2019-01-25 19:58:49 -08:00
parent 59c041ad61
commit ef0094eefd
2 changed files with 106 additions and 15 deletions

View File

@ -0,0 +1,8 @@
import cfscrape
request = "GET / HTTP/1.1\r\n"
cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
# cookie = "Cookie: %s" % (cookie_value)
print (request)

View File

@ -5,12 +5,14 @@ extern crate select;
use clap::{App, Arg}; use clap::{App, Arg};
use select::document::Document; use select::document::Document;
use select::predicate::{Attr, Class, Name, Predicate}; use select::predicate::{Attr, Class, Name, Predicate};
use std::fs;
use std::path::Path; use std::path::Path;
use std::process::Command; use std::process::Command;
use std::{thread, time};
// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers' static mut COOKIE: &str = "";
static mut USER_AGENT: &str = "";
const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150";
fn main() { fn main() {
let matches = App::new("New Torrents Fetcher") let matches = App::new("New Torrents Fetcher")
@ -38,12 +40,14 @@ fn main() {
let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap()); let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());
fetch_cloudflare_cookie();
magnetdl(save_dir);
skytorrents(save_dir);
leetx(save_dir);
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") { if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
torrents_csv_scan(Path::new(t), save_dir); torrents_csv_scan(Path::new(t), save_dir);
} }
skytorrents(save_dir);
leetx(save_dir);
} }
fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) { fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
@ -53,14 +57,47 @@ fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
} }
fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> { fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
println!("Scanning torrent infohashes...");
let mut rdr = csv::ReaderBuilder::new() let mut rdr = csv::ReaderBuilder::new()
.delimiter(b';') .delimiter(b';')
.from_path(torrents_csv_file) .from_path(torrents_csv_file)
.unwrap(); .unwrap();
rdr rdr.records().map(|x| x.unwrap()[0].to_string()).collect()
.records() }
.map(|x| x.unwrap()[0].to_string())
.collect() fn magnetdl(save_dir: &Path) {
let page_limit = 30;
let base_url = "https://magnetdl.com";
let mut pages: Vec<String> = Vec::new();
let types = ["software", "movies", "games", "e-books", "tv", "music"];
// https://www.magnetdl.com/download/software/se/desc/1/
for c_type in types.iter() {
for i in 1..page_limit {
let page = format!("{}/download/{}/se/desc/{}/", base_url, c_type, i);
pages.push(page);
}
}
for page in pages.iter() {
println!("Fetching page {}", page);
let html = match fetch_html(page) {
Ok(t) => t,
_err => continue,
};
let document = Document::from(&html[..]);
for row in document.find(Class("m").descendant(Name("a"))) {
let hash = match row.attr("href") {
Some(t) => t.to_string().chars().skip(20).take(40).collect(),
None => continue,
};
fetch_torrent(hash, save_dir);
}
}
} }
fn skytorrents(save_dir: &Path) { fn skytorrents(save_dir: &Path) {
@ -196,11 +233,12 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
.unwrap(); .unwrap();
if !Path::new(&full_path).exists() { if !Path::new(&full_path).exists() {
unsafe {
Command::new("curl") Command::new("curl")
.args(&[ .args(&[
&url, &url,
"-H", "-H",
"User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0", USER_AGENT,
"-H", "-H",
COOKIE, COOKIE,
"--compressed", "--compressed",
@ -208,12 +246,57 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
&full_path, &full_path,
"-s", "-s",
]) ])
.status() .output()
.expect("curl command failed"); .expect("curl command failed");
check_cloud_flare(Path::new(&full_path));
thread::sleep(time::Duration::from_millis(2000));
println!("{} saved.", &full_path); println!("{} saved.", &full_path);
} }
}
} }
fn fetch_html(url: &str) -> Result<String, reqwest::Error> { fn check_cloud_flare(file: &Path) {
reqwest::get(url)?.text() let data = match fs::read_to_string(file) {
Ok(t) => t,
_err => return,
};
if data == "" {
return;
}
let first_line = &data[..5];
if first_line == "<!DOC" {
fs::remove_file(file);
println!("Cloudflare failed, re-fetching.");
fetch_cloudflare_cookie();
}
}
fn fetch_cloudflare_cookie() {
unsafe {
println!("Fetching new CloudFlare Cookie...");
let output = Command::new("python")
.args(&["src/cf.py"])
.output()
.expect("python command failed");
let out = string_to_static_str(format!("{}", String::from_utf8_lossy(&output.stdout)));
let split: Vec<&str> = out.lines().collect();
COOKIE = split[1];
USER_AGENT = split[2];
}
}
fn string_to_static_str(s: String) -> &'static str {
Box::leak(s.into_boxed_str())
}
fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
reqwest::Client::new()
.get(url)
.header("Accept", "text/html")
.send()?
.text()
} }