Fixing cloudflare, Scraping magnetdl. Fixes #38
This commit is contained in:
parent
59c041ad61
commit
ef0094eefd
8
new_torrents_fetcher/src/cf.py
Normal file
8
new_torrents_fetcher/src/cf.py
Normal file
@ -0,0 +1,8 @@
|
||||
import cfscrape
|
||||
request = "GET / HTTP/1.1\r\n"
|
||||
|
||||
cookie_value, user_agent = cfscrape.get_cookie_string("https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent")
|
||||
request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent)
|
||||
# cookie = "Cookie: %s" % (cookie_value)
|
||||
|
||||
print (request)
|
@ -5,12 +5,14 @@ extern crate select;
|
||||
use clap::{App, Arg};
|
||||
use select::document::Document;
|
||||
use select::predicate::{Attr, Class, Name, Predicate};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::{thread, time};
|
||||
|
||||
// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers'
|
||||
static mut COOKIE: &str = "";
|
||||
static mut USER_AGENT: &str = "";
|
||||
|
||||
const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150";
|
||||
|
||||
fn main() {
|
||||
let matches = App::new("New Torrents Fetcher")
|
||||
@ -38,12 +40,14 @@ fn main() {
|
||||
|
||||
let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap());
|
||||
|
||||
fetch_cloudflare_cookie();
|
||||
magnetdl(save_dir);
|
||||
skytorrents(save_dir);
|
||||
leetx(save_dir);
|
||||
|
||||
if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") {
|
||||
torrents_csv_scan(Path::new(t), save_dir);
|
||||
}
|
||||
|
||||
skytorrents(save_dir);
|
||||
leetx(save_dir);
|
||||
}
|
||||
|
||||
fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
|
||||
@ -53,14 +57,47 @@ fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) {
|
||||
}
|
||||
|
||||
fn collect_info_hashes(torrents_csv_file: &Path) -> Vec<String> {
|
||||
println!("Scanning torrent infohashes...");
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.delimiter(b';')
|
||||
.from_path(torrents_csv_file)
|
||||
.unwrap();
|
||||
rdr
|
||||
.records()
|
||||
.map(|x| x.unwrap()[0].to_string())
|
||||
.collect()
|
||||
rdr.records().map(|x| x.unwrap()[0].to_string()).collect()
|
||||
}
|
||||
|
||||
fn magnetdl(save_dir: &Path) {
|
||||
let page_limit = 30;
|
||||
|
||||
let base_url = "https://magnetdl.com";
|
||||
|
||||
let mut pages: Vec<String> = Vec::new();
|
||||
|
||||
let types = ["software", "movies", "games", "e-books", "tv", "music"];
|
||||
// https://www.magnetdl.com/download/software/se/desc/1/
|
||||
for c_type in types.iter() {
|
||||
for i in 1..page_limit {
|
||||
let page = format!("{}/download/{}/se/desc/{}/", base_url, c_type, i);
|
||||
pages.push(page);
|
||||
}
|
||||
}
|
||||
|
||||
for page in pages.iter() {
|
||||
println!("Fetching page {}", page);
|
||||
let html = match fetch_html(page) {
|
||||
Ok(t) => t,
|
||||
_err => continue,
|
||||
};
|
||||
|
||||
let document = Document::from(&html[..]);
|
||||
|
||||
for row in document.find(Class("m").descendant(Name("a"))) {
|
||||
let hash = match row.attr("href") {
|
||||
Some(t) => t.to_string().chars().skip(20).take(40).collect(),
|
||||
None => continue,
|
||||
};
|
||||
fetch_torrent(hash, save_dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn skytorrents(save_dir: &Path) {
|
||||
@ -196,11 +233,12 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
|
||||
.unwrap();
|
||||
|
||||
if !Path::new(&full_path).exists() {
|
||||
Command::new("curl")
|
||||
unsafe {
|
||||
Command::new("curl")
|
||||
.args(&[
|
||||
&url,
|
||||
"-H",
|
||||
"User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",
|
||||
USER_AGENT,
|
||||
"-H",
|
||||
COOKIE,
|
||||
"--compressed",
|
||||
@ -208,12 +246,57 @@ fn fetch_torrent(hash: String, save_dir: &Path) {
|
||||
&full_path,
|
||||
"-s",
|
||||
])
|
||||
.status()
|
||||
.output()
|
||||
.expect("curl command failed");
|
||||
println!("{} saved.", &full_path);
|
||||
check_cloud_flare(Path::new(&full_path));
|
||||
thread::sleep(time::Duration::from_millis(2000));
|
||||
println!("{} saved.", &full_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
|
||||
reqwest::get(url)?.text()
|
||||
fn check_cloud_flare(file: &Path) {
|
||||
let data = match fs::read_to_string(file) {
|
||||
Ok(t) => t,
|
||||
_err => return,
|
||||
};
|
||||
|
||||
if data == "" {
|
||||
return;
|
||||
}
|
||||
|
||||
let first_line = &data[..5];
|
||||
|
||||
if first_line == "<!DOC" {
|
||||
fs::remove_file(file);
|
||||
println!("Cloudflare failed, re-fetching.");
|
||||
fetch_cloudflare_cookie();
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_cloudflare_cookie() {
|
||||
unsafe {
|
||||
println!("Fetching new CloudFlare Cookie...");
|
||||
let output = Command::new("python")
|
||||
.args(&["src/cf.py"])
|
||||
.output()
|
||||
.expect("python command failed");
|
||||
|
||||
let out = string_to_static_str(format!("{}", String::from_utf8_lossy(&output.stdout)));
|
||||
let split: Vec<&str> = out.lines().collect();
|
||||
COOKIE = split[1];
|
||||
USER_AGENT = split[2];
|
||||
}
|
||||
}
|
||||
|
||||
fn string_to_static_str(s: String) -> &'static str {
|
||||
Box::leak(s.into_boxed_str())
|
||||
}
|
||||
|
||||
fn fetch_html(url: &str) -> Result<String, reqwest::Error> {
|
||||
reqwest::Client::new()
|
||||
.get(url)
|
||||
.header("Accept", "text/html")
|
||||
.send()?
|
||||
.text()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user