Adding rust script to pull from skytorrents.

2018-10-09 12:54:47 -07:00 · 2018-10-09 12:54:47 -07:00 · ac1a54eb1a
commit ac1a54eb1a
parent 1f31cefc67
5 changed files with 1508 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 run.out
 old_greps.sh
+new_torrents_fetcher/target
--- a/new_torrents_fetcher/Cargo.lock
+++ b/new_torrents_fetcher/Cargo.lock
--- a/new_torrents_fetcher/Cargo.toml
+++ b/new_torrents_fetcher/Cargo.toml
@ -0,0 +1,8 @@
+[package]
+name = "new_torrents_fetcher"
+version = "0.1.0"
+authors = ["Dessalines <happydooby@gmail.com>"]
+
+[dependencies]
+reqwest = "*"
+select = "*"
--- a/new_torrents_fetcher/src/main.rs
+++ b/new_torrents_fetcher/src/main.rs
@ -0,0 +1,114 @@
+extern crate reqwest;
+extern crate select;
+use select::document::Document;
+use select::predicate::{Attr, Name, Predicate};
+use std::fs;
+use std::fs::File;
+use std::fs::OpenOptions;
+use std::io::prelude::*;
+use std::path::Path;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+const TMP_ADDS: &str = "tmp_adds.csv";
+
+fn main() {
+  if Path::new(TMP_ADDS).exists() {
+    fs::remove_file(TMP_ADDS).unwrap();
+  }
+  File::create(TMP_ADDS).unwrap();
+  skytorrents();
+}
+
+fn skytorrents() {
+  let mut pages: Vec<String> = Vec::new();
+  for i in 1..20 {
+    let page = format!("{}{}", "https://www.skytorrents.lol/top100?page=", i);
+    pages.push(page);
+  }
+
+  for page in pages.iter() {
+    println!("Fetching page {}", page);
+    let html = fetch_html(page);
+    let document = Document::from(&html[..]);
+    for row in document.find(Attr("id", "results").descendant(Name("tr"))) {
+      let info_hash: String = row
+        .find(Name("td").descendant(Name("a")))
+        .nth(2)
+        .unwrap()
+        .attr("href")
+        .unwrap()
+        .chars()
+        .skip(20)
+        .take(40)
+        .collect();
+
+      let name = row
+        .find(Name("td").descendant(Name("a")))
+        .nth(0)
+        .unwrap()
+        .text();
+
+      let mut size = row.find(Name("td")).nth(1).unwrap().text();
+      let size_bytes = convert_to_bytes(size);
+
+      // simulate a start and scraped date
+      let created_unix = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("Time went backwards")
+        .as_secs();
+
+      let seeders = row.find(Name("td")).nth(4).unwrap().text();
+      let leechers = row.find(Name("td")).nth(5).unwrap().text();
+
+      // infohash;name;size_bytes;created_unix;seeders;leechers;completed;scraped_date
+
+      let add_line = [
+        info_hash,
+        name,
+        size_bytes.to_string(),
+        created_unix.to_string(),
+        seeders,
+        leechers,
+        "".to_string(),
+        created_unix.to_string(),
+      ].join(";");
+
+      append_to_file(add_line);
+    }
+  }
+}
+
+fn fetch_html(url: &str) -> String {
+  reqwest::get(url).unwrap().text().unwrap()
+}
+
+fn convert_to_bytes(human: String) -> u64 {
+  let split = human.split(" ").collect::<Vec<&str>>();
+  let float: f32 = split[0].parse().unwrap();
+  let unit = split[1];
+
+  let mut num = (float * 100 as f32) as u64;
+
+  match unit {
+    "KB" => num *= 10u64.pow(3),
+    "MB" => num *= 10u64.pow(6),
+    "GB" => num *= 10u64.pow(9),
+    "TB" => num *= 10u64.pow(12),
+    "PB" => num *= 10u64.pow(15),
+    _ => println!("Unknown unit"),
+  }
+  num /= 100;
+  num
+}
+
+fn append_to_file(line: String) {
+  let mut file = OpenOptions::new()
+    .write(true)
+    .append(true)
+    .open(TMP_ADDS)
+    .unwrap();
+
+  if let Err(e) = writeln!(file, "{}", line) {
+    eprintln!("Couldn't write to file: {}", e);
+  }
+}
--- a/update.sh
+++ b/update.sh
@ -0,0 +1,8 @@
+# This fetches from several torrent websites for new updates
+
+pushd new_torrents_fetcher
+cargo run
+cat tmp_adds.csv >> ../torrents.csv
+rm tmp_adds.csv
+popd
+. prune.sh