From 4b2952e3a61c710be4d033b4aceffdfe464344f1 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Fri, 25 Jan 2019 11:31:42 -0800 Subject: [PATCH] Adding Clap parser. Newtorrents fetcher downloads from itorrents. Fixes #35 Fetcher can scan current torrents.csv file. Fixes #40 --- new_torrents_fetcher/Cargo.lock | 113 +++++++++++++++ new_torrents_fetcher/Cargo.toml | 4 +- new_torrents_fetcher/src/main.rs | 238 ++++++++++++++----------------- 3 files changed, 221 insertions(+), 134 deletions(-) diff --git a/new_torrents_fetcher/Cargo.lock b/new_torrents_fetcher/Cargo.lock index 277d714..ad04884 100644 --- a/new_torrents_fetcher/Cargo.lock +++ b/new_torrents_fetcher/Cargo.lock @@ -3,6 +3,14 @@ name = "adler32" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "arrayvec" version = "0.4.7" @@ -11,6 +19,16 @@ dependencies = [ "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "atty" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "base64" version = "0.9.3" @@ -67,6 +85,20 @@ name = "cfg-if" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "clap" +version = "2.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cloudabi" version = "0.0.3" @@ -127,6 +159,23 @@ name = "crossbeam-utils" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "csv" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.79 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "csv-core" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "debug_unreachable" version = "0.1.1" @@ -403,6 +452,15 @@ name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "memchr" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "memoffset" version = "0.2.1" @@ -504,6 +562,8 @@ dependencies = [ name = "new_torrents_fetcher" version = "0.1.0" dependencies = [ + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "select 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -680,6 +740,14 @@ name = "redox_syscall" version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "remove_dir_all" version = "0.5.1" @@ -878,6 +946,11 @@ name = "string_cache_shared" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "strsim" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "syn" version = "0.11.11" @@ -919,6 +992,24 @@ dependencies = [ "utf-8 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "termion" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "textwrap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "time" version = "0.1.40" @@ -1116,6 +1207,11 @@ name = "unicode-normalization" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unicode-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unicode-xid" version = "0.0.4" @@ -1170,6 +1266,11 @@ name = "vcpkg" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "version_check" version = "0.1.5" @@ -1230,7 +1331,9 @@ dependencies = [ [metadata] "checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c" +"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" +"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" "checksum base64 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "489d6c0ed21b11d038c31b6ceccca973e65d73ba3bd8ecb9a2babf5546164643" "checksum bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d9bf6104718e80d7b26a68fdbacff3481cfc05df670821affc7e9cbc1884400c" "checksum bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "02b4ff8b16e6076c3e14220b39fbc1fabb6737522281a388998046859400895f" @@ -1240,6 +1343,7 @@ dependencies = [ "checksum bytes 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0ce55bd354b095246fc34caf4e9e242f5297a7fd938b090cadfea6eee614aa62" "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" "checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3" +"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum core-foundation 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "286e0b41c3a20da26536c6000a280585d519fd07b3956b43aed8a79e9edce980" "checksum core-foundation-sys 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "716c271e8613ace48344f723b60b900a93150271e5be206212d052bbc0883efa" @@ -1247,6 +1351,8 @@ dependencies = [ "checksum crossbeam-deque 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3486aefc4c0487b9cb52372c97df0a48b8c249514af1ee99703bf70d2f2ceda1" "checksum crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30fecfcac6abfef8771151f8be4abc9e4edc112c2bcb233314cafde2680536e9" "checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015" +"checksum csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd1c44c58078cfbeaf11fbb3eac9ae5534c23004ed770cc4bfb48e658ae4f04" +"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65" "checksum debug_unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9a032eac705ca39214d169f83e3d3da290af06d8d1d344d1baad2fd002dca4b3" "checksum dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d301140eb411af13d3115f9a562c85cc6b541ade9dfa314132244aaee7489dd" "checksum encoding_rs 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "065f4d0c826fdaef059ac45487169d918558e3cf86c9d89f6e81cf52369126e5" @@ -1280,6 +1386,7 @@ dependencies = [ "checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" "checksum markup5ever 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ff834ac7123c6a37826747e5ca09db41fd7a83126792021c2e636ad174bb77d3" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +"checksum memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e1dd4eaac298c32ce07eb6ed9242eda7d82955b9170b7d6db59b2e02cc63fcb8" "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" "checksum mime 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "4b082692d3f6cf41b453af73839ce3dfc212c4411cbb2441dff80a716e38bd79" "checksum mime_guess 2.0.0-alpha.6 (registry+https://github.com/rust-lang/crates.io-index)" = "30de2e4613efcba1ec63d8133f344076952090c122992a903359be5a4f99c3ed" @@ -1311,6 +1418,7 @@ dependencies = [ "checksum rand_core 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1961a422c4d189dfb50ffa9320bf1f2a9bd54ecb92792fb9477f99a1045f3372" "checksum rand_core 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0905b6b7079ec73b314d4c748701f6931eb79fd97c668caa3f1899b22b32c6db" "checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" +"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" "checksum reqwest 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1d68c7bf0b1dc3860b80c6d31d05808bf54cdc1bfc70a4680893791becd083ae" "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" @@ -1335,10 +1443,13 @@ dependencies = [ "checksum string_cache 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "413fc7852aeeb5472f1986ef755f561ddf0c789d3d796e65f0b6fe293ecd4ef8" "checksum string_cache_codegen 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "35293b05cf1494e8ddd042a7df6756bf18d07f42d234f32e71dce8a7aabb0191" "checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" +"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" "checksum tempfile 3.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "55c1195ef8513f3273d55ff59fe5da6940287a0d7a98331254397f464833675b" "checksum tendril 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1b72f8e2f5b73b65c315b1a70c730f24b9d7a25f39e98de8acbe2bb795caea" +"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" +"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" "checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" "checksum tokio 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "6e93c78d23cc61aa245a8acd2c4a79c4d7fa7fb5c3ca90d5737029f043a84895" "checksum tokio-codec 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5c501eceaf96f0e1793cf26beb63da3d11c738c4a943fdf3746d81d64684c39f" @@ -1357,6 +1468,7 @@ dependencies = [ "checksum unicase 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9d3218ea14b4edcaccfa0df0a64a3792a2c32cc706f1b336e48867f9d3147f90" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" "checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91" @@ -1365,6 +1477,7 @@ dependencies = [ "checksum utf-8 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bab35f71693630bb1953dce0f2bcd780e7cde025027124a202ac08a45ba25141" "checksum uuid 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dab5c5526c5caa3d106653401a267fed923e7046f35895ffcb5ca42db64942e6" "checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d" +"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum want 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "797464475f30ddb8830cc529aaaae648d581f99e2036a928877dfde027ddf6b3" diff --git a/new_torrents_fetcher/Cargo.toml b/new_torrents_fetcher/Cargo.toml index b24a4af..1de1d53 100644 --- a/new_torrents_fetcher/Cargo.toml +++ b/new_torrents_fetcher/Cargo.toml @@ -5,4 +5,6 @@ authors = ["Dessalines "] [dependencies] reqwest = "*" -select = "*" \ No newline at end of file +select = "*" +clap = "*" +csv = "*" \ No newline at end of file diff --git a/new_torrents_fetcher/src/main.rs b/new_torrents_fetcher/src/main.rs index cd4ccc6..bc19961 100644 --- a/new_torrents_fetcher/src/main.rs +++ b/new_torrents_fetcher/src/main.rs @@ -1,26 +1,69 @@ +extern crate clap; +extern crate csv; extern crate reqwest; extern crate select; +use clap::{App, Arg}; use select::document::Document; use select::predicate::{Attr, Class, Name, Predicate}; -use std::fs; -use std::fs::File; -use std::fs::OpenOptions; -use std::io::prelude::*; use std::path::Path; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::process::Command; -const TMP_ADDS: &str = "tmp_adds.csv"; +// curl 'https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent' -H 'Connection: keep-alive' -H 'Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150' -H 'Upgrade-Insecure-Requests: 1' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: Trailers' + +const COOKIE: &str = "Cookie: __cfduid=dbd0c40338c7e5ad0dc38fec2e3913fe11548372388; cf_clearance=6614dc889970147a3a6e64f3e5b60a09469fe9f8-1548434564-3600-150"; fn main() { - if Path::new(TMP_ADDS).exists() { - fs::remove_file(TMP_ADDS).unwrap(); + let matches = App::new("New Torrents Fetcher") + .version("0.1.0") + .author("Dessalines") + .about("Fetches new torrent files from various sites.") + .arg( + Arg::with_name("TORRENT_SAVE_DIR") + .short("s") + .long("save_dir") + .value_name("DIR") + .takes_value(true) + .help("Where to save the torrent files.") + .required(true), + ) + .arg( + Arg::with_name("TORRENTS_CSV_FILE") + .short("t") + .long("torrents_csv") + .value_name("FILE") + .takes_value(true) + .help("The location of a torrents.csv file. If given, it will download those infohashes."), + ) + .get_matches(); + + let save_dir = Path::new(matches.value_of("TORRENT_SAVE_DIR").unwrap()); + + if let Some(t) = matches.value_of("TORRENTS_CSV_FILE") { + torrents_csv_scan(Path::new(t), save_dir); } - File::create(TMP_ADDS).unwrap(); - leetx(); - skytorrents(); + + skytorrents(save_dir); + leetx(save_dir); } -fn skytorrents() { +fn torrents_csv_scan(torrents_csv_file: &Path, save_dir: &Path) { + for hash in collect_info_hashes(torrents_csv_file) { + fetch_torrent(hash, save_dir); + } +} + +fn collect_info_hashes(torrents_csv_file: &Path) -> Vec { + let mut rdr = csv::ReaderBuilder::new() + .delimiter(b';') + .from_path(torrents_csv_file) + .unwrap(); + rdr + .records() + .map(|x| x.unwrap()[0].to_string()) + .collect() +} + +fn skytorrents(save_dir: &Path) { let page_limit = 100; let base_url = "https://www.skytorrents.lol"; @@ -45,62 +88,27 @@ fn skytorrents() { for page in pages.iter() { println!("Fetching page {}", page); - let html = fetch_html(page); + let html = match fetch_html(page) { + Ok(t) => t, + _err => continue, + }; let document = Document::from(&html[..]); for row in document.find(Attr("id", "results").descendant(Name("tr"))) { - let info_hash_td = match row.find(Name("td").descendant(Name("a"))).nth(2) { + let hash_td = match row.find(Name("td").descendant(Name("a"))).nth(2) { Some(t) => t, None => continue, }; - let info_hash = match info_hash_td.attr("href") { + let hash = match hash_td.attr("href") { Some(t) => t.chars().skip(20).take(40).collect(), None => continue, }; - let name = match row.find(Name("td").descendant(Name("a"))).nth(0) { - Some(t) => t.text(), - None => continue, - }; - - let size = match row.find(Name("td")).nth(1) { - Some(t) => t.text(), - None => continue, - }; - let size_bytes = convert_to_bytes(size); - - // simulate a start and scraped date - let created_unix = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_secs(); - - let seeders = match row.find(Name("td")).nth(4) { - Some(t) => t.text().replace(",", ""), - None => continue, - }; - let leechers = match row.find(Name("td")).nth(5) { - Some(t) => t.text().replace(",", ""), - None => continue, - }; - // infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date - - let add_line = [ - info_hash, - name, - size_bytes.to_string(), - created_unix.to_string(), - seeders, - leechers, - "".to_string(), - created_unix.to_string(), - ].join(";"); - - append_to_file(add_line); + fetch_torrent(hash, save_dir); } } } -fn leetx() { +fn leetx(save_dir: &Path) { let page_limit = 50; let base_url = "https://1337x.to"; @@ -128,7 +136,10 @@ fn leetx() { for page in pages.iter() { println!("Fetching page {}", page); - let html = fetch_html(page); + let html = match fetch_html(page) { + Ok(t) => t, + _err => continue, + }; let document = Document::from(&html[..]); for row in document.find( @@ -151,9 +162,13 @@ fn leetx() { let detail_full_url = format!("{}{}", base_url, detail_page_url_href); println!("Fetching page {}", detail_full_url); - let detail_html = fetch_html(&detail_full_url); + let detail_html = match fetch_html(&detail_full_url) { + Ok(t) => t, + _err => continue, + }; + let detail_document = Document::from(&detail_html[..]); - let info_hash = match detail_document + let hash = match detail_document .find(Class("infohash-box").descendant(Name("span"))) .nth(0) { @@ -161,87 +176,44 @@ fn leetx() { None => continue, }; - let name_col = match row.find(Class("coll-1")).nth(0) { - Some(t) => t, - None => continue, - }; - let name = match name_col.find(Name("a")).nth(1) { - Some(t) => t.text(), - None => continue, - }; - - let seeders = match row.find(Class("coll-2")).nth(0) { - Some(t) => t.text(), - None => continue, - }; - let leechers = match row.find(Class("coll-3")).nth(0) { - Some(t) => t.text(), - None => continue, - }; - - let size_col = match row.find(Class("coll-4")).nth(0) { - Some(t) => t, - None => continue, - }; - let size = match size_col.children().nth(0) { - Some(t) => t.text().replace(",", ""), - None => continue, - }; - - let size_bytes = convert_to_bytes(size); - - let created_unix = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_secs(); - - let add_line = [ - info_hash, - name, - size_bytes.to_string(), - created_unix.to_string(), - seeders, - leechers, - "".to_string(), - created_unix.to_string(), - ].join(";"); - - append_to_file(add_line); + fetch_torrent(hash, save_dir); } } } -fn fetch_html(url: &str) -> String { - reqwest::get(url).unwrap().text().unwrap() -} +fn fetch_torrent(hash: String, save_dir: &Path) { + // Curl is the only thing that works with itorrent + let file_name = format!("{}.torrent", hash); + let url = format!( + "https://itorrents.org/torrent/{}.torrent", + &hash.to_ascii_uppercase() + ); -fn convert_to_bytes(human: String) -> u64 { - let split = human.split(" ").collect::>(); - let float: f32 = split[0].parse().unwrap(); - let unit = split[1]; - - let mut num = (float * 100 as f32) as u64; - - match unit { - "KB" => num *= 10u64.pow(3), - "MB" => num *= 10u64.pow(6), - "GB" => num *= 10u64.pow(9), - "TB" => num *= 10u64.pow(12), - "PB" => num *= 10u64.pow(15), - _ => println!("Unknown unit"), - } - num /= 100; - num -} - -fn append_to_file(line: String) { - let mut file = OpenOptions::new() - .write(true) - .append(true) - .open(TMP_ADDS) + let full_path = save_dir + .join(&file_name) + .into_os_string() + .into_string() .unwrap(); - if let Err(e) = writeln!(file, "{}", line) { - eprintln!("Couldn't write to file: {}", e); + if !Path::new(&full_path).exists() { + Command::new("curl") + .args(&[ + &url, + "-H", + "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0", + "-H", + COOKIE, + "--compressed", + "-o", + &full_path, + "-s", + ]) + .status() + .expect("curl command failed"); + println!("{} saved.", &full_path); } } + +fn fetch_html(url: &str) -> Result { + reqwest::get(url)?.text() +}