Fixing cloudflare, Scraping magnetdl. Fixes #38
This commit is contained in:
parent
59c041ad61
commit
ef0094eefd
|
@ -0,0 +1,8 @@
|
||||||
|
import cfscrape
|
||||||
|
request = "GET / HTTP/1.1\r\n"
|
||||||
|
|
||||||
|
cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
|
||||||
|
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
|
||||||
|
# cookie = "Cookie: %s" % (cookie_value)
|
||||||
|
|
||||||
|
print (request)
|
|
@ -5,12 +5,14 @@ extern crate select;
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
use select::document::Document;
|
use select::document::Document;
|
||||||
use select::predicate::{Attr, Class, Name, Predicate};
|
use select::predicate::{Attr, Class, Name, Predicate};
|
||||||
|
use std::fs;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
use std::{thread, time};
|
||||||
|
|
||||||
// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers'
|
static mut COOKIE: &str = "";
|
||||||
|
static mut USER_AGENT: &str = "";
|
||||||
|
|
||||||
const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150";
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let matches = App::new("New Torrents Fetcher")
|
let matches = App::new("New Torrents Fetcher")
|
||||||
|
@ -38,12 +40,14 @@ fn main() {
|
||||||
|
|
||||||
let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());
|
let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());
|
||||||
|
|
||||||
|
fetch_cloudflare_cookie();
|
||||||
|
magnetdl(save_dir);
|
||||||
|
skytorrents(save_dir);
|
||||||
|
leetx(save_dir);
|
||||||
|
|
||||||
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
|
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
|
||||||
torrents_csv_scan(Path::new(t), save_dir);
|
torrents_csv_scan(Path::new(t), save_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
skytorrents(save_dir);
|
|
||||||
leetx(save_dir);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
|
fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
|
||||||
|
@ -53,14 +57,47 @@ fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
|
fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
|
||||||
|
println!("Scanning torrent infohashes...");
|
||||||
let mut rdr = csv::ReaderBuilder::new()
|
let mut rdr = csv::ReaderBuilder::new()
|
||||||
.delimiter(b';')
|
.delimiter(b';')
|
||||||
.from_path(torrents_csv_file)
|
.from_path(torrents_csv_file)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
rdr
|
rdr.records().map(|x| x.unwrap()[0].to_string()).collect()
|
||||||
.records()
|
}
|
||||||
.map(|x| x.unwrap()[0].to_string())
|
|
||||||
.collect()
|
fn magnetdl(save_dir: &Path) {
|
||||||
|
let page_limit = 30;
|
||||||
|
|
||||||
|
let base_url = "https://magnetdl.com";
|
||||||
|
|
||||||
|
let mut pages: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
let types = ["software", "movies", "games", "e-books", "tv", "music"];
|
||||||
|
// https://www.magnetdl.com/download/software/se/desc/1/
|
||||||
|
for c_type in types.iter() {
|
||||||
|
for i in 1..page_limit {
|
||||||
|
let page = format!("{}/download/{}/se/desc/{}/", base_url, c_type, i);
|
||||||
|
pages.push(page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for page in pages.iter() {
|
||||||
|
println!("Fetching page {}", page);
|
||||||
|
let html = match fetch_html(page) {
|
||||||
|
Ok(t) => t,
|
||||||
|
_err => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let document = Document::from(&html[..]);
|
||||||
|
|
||||||
|
for row in document.find(Class("m").descendant(Name("a"))) {
|
||||||
|
let hash = match row.attr("href") {
|
||||||
|
Some(t) => t.to_string().chars().skip(20).take(40).collect(),
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
fetch_torrent(hash, save_dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn skytorrents(save_dir: &Path) {
|
fn skytorrents(save_dir: &Path) {
|
||||||
|
@ -196,11 +233,12 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
if !Path::new(&full_path).exists() {
|
if !Path::new(&full_path).exists() {
|
||||||
Command::new("curl")
|
unsafe {
|
||||||
|
Command::new("curl")
|
||||||
.args(&[
|
.args(&[
|
||||||
&url,
|
&url,
|
||||||
"-H",
|
"-H",
|
||||||
"User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",
|
USER_AGENT,
|
||||||
"-H",
|
"-H",
|
||||||
COOKIE,
|
COOKIE,
|
||||||
"--compressed",
|
"--compressed",
|
||||||
|
@ -208,12 +246,57 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
|
||||||
&full_path,
|
&full_path,
|
||||||
"-s",
|
"-s",
|
||||||
])
|
])
|
||||||
.status()
|
.output()
|
||||||
.expect("curl command failed");
|
.expect("curl command failed");
|
||||||
println!("{} saved.", &full_path);
|
check_cloud_flare(Path::new(&full_path));
|
||||||
|
thread::sleep(time::Duration::from_millis(2000));
|
||||||
|
println!("{} saved.", &full_path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
|
fn check_cloud_flare(file: &Path) {
|
||||||
reqwest::get(url)?.text()
|
let data = match fs::read_to_string(file) {
|
||||||
|
Ok(t) => t,
|
||||||
|
_err => return,
|
||||||
|
};
|
||||||
|
|
||||||
|
if data == "" {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let first_line = &data[..5];
|
||||||
|
|
||||||
|
if first_line == "<!DOC" {
|
||||||
|
fs::remove_file(file);
|
||||||
|
println!("Cloudflare failed, re-fetching.");
|
||||||
|
fetch_cloudflare_cookie();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_cloudflare_cookie() {
|
||||||
|
unsafe {
|
||||||
|
println!("Fetching new CloudFlare Cookie...");
|
||||||
|
let output = Command::new("python")
|
||||||
|
.args(&["src/cf.py"])
|
||||||
|
.output()
|
||||||
|
.expect("python command failed");
|
||||||
|
|
||||||
|
let out = string_to_static_str(format!("{}", String::from_utf8_lossy(&output.stdout)));
|
||||||
|
let split: Vec<&str> = out.lines().collect();
|
||||||
|
COOKIE = split[1];
|
||||||
|
USER_AGENT = split[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn string_to_static_str(s: String) -> &'static str {
|
||||||
|
Box::leak(s.into_boxed_str())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
|
||||||
|
reqwest::Client::new()
|
||||||
|
.get(url)
|
||||||
|
.header("Accept", "text/html")
|
||||||
|
.send()?
|
||||||
|
.text()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue