Browse Source

cant.even.roll

master
root 5 months ago
parent
commit
f107ed0cdc
  1. 16
      Cargo.toml
  2. 108
      src/main.rs
  3. 36
      src/request.rs

16
Cargo.toml

@ -0,0 +1,16 @@
[package]
name = "crawl"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
url = "2.2.2"
request = { version = "0.0.7" }
reqwest = { version = "0.10.10", features = ["json"] }
tokio = { version = "1.8.1", features = ["full"] }
tokio-stream = { version = "0.1.7", features = ["fs","io-util"] }
structopt = "0.3.22"
futures = "0.3.15"
indicatif = "0.16.2"

108
src/main.rs

@ -0,0 +1,108 @@
// gonna walk around for a minute, been sitting down for too long.. was nodding out for a sec
use indicatif;
use request::Request;
mod request;
#[allow(unused)]
#[allow(unused)]
use reqwest::header;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::runtime::Builder;
use crate::request::Response;
#[allow(unused)]
use futures::{future::join_all, stream, StreamExt};
use reqwest::ClientBuilder;
use std::collections::HashMap;
use tokio::fs::File;
use tokio::time::Duration;
use tokio_stream::wrappers::LinesStream;
use indicatif::{ProgressBar, ProgressStyle};
use std::borrow::{Borrow, BorrowMut};
use std::cell::RefCell;
use std::fs::File as OtherFile;
use std::io::{self, prelude::*, BufReader as OtherBufReader};
use structopt::StructOpt;
#[derive(Debug, StructOpt)]
struct Opts {
#[structopt(short = "i", long = "infile")]
infile: String,
#[structopt(short = "o", long = "outfile")]
outfile: String,
#[structopt(short = "c", long = "concurrency", default_value = "50")]
concurrency: f64,
}
#[allow(unused)]
fn main(){
let opt = Opts::from_args();
let mut infile = opt.infile;
let file = OtherFile::open(infile).unwrap();
let reader = OtherBufReader::new(file);
let mut count = reader.lines().count();
let runtime = Builder::new_multi_thread().enable_all().build().unwrap();
runtime.block_on(execute2(count));
}
#[allow(unused)]
async fn execute2(count: usize) -> Result<(), Box<dyn std::error::Error>> {
let opt = Opts::from_args();
let filename = opt.infile;
let concurrency = opt.concurrency;
let outfile = opt.outfile;
let file = File::open(filename).await.expect("not open");
let mut lines = BufReader::new(file).lines();
let mut outfile = File::create(outfile)
.await
.expect("Failed to open outfile.");
let mut outfile_ref = RefCell::new(outfile);
let bar = ProgressBar::new(count as u64);
// let mut handlevec = vec![];
bar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed}] {bar:40.cyan/blue} {pos:>7}/{len:7} {per_sec} {eta}")
.progress_chars("##-"),
);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(5))
.connect_timeout(Duration::from_secs(5))
.build()?;
LinesStream::new(lines)
.filter_map(|line| async { line.ok() })
.map(|line| {
let inner_file = &outfile_ref;
let bar = bar.borrow();
let client = client.borrow() ;
async move {
let text = Request::new(line.clone(), client.clone()).get().await.unwrap_or(Response {
url: "nourl".to_string(),
text: "NOT_FOUND".to_string(),
});
if text.text.to_lowercase().contains("games") && ( text.text.to_lowercase().contains("login") || text.text.to_lowercase().contains("signup")) {
// println!("{}",text.url) ;
bar.inc(1);
let url_bytes = format!("{}\n", text.url).into_bytes();
inner_file.borrow_mut().write_all(&url_bytes).await;
} else {
bar.inc(1);
}
}
})
.buffer_unordered(concurrency as usize)
.collect::<Vec<()>>()
.await;
Ok(())
}

36
src/request.rs

@ -0,0 +1,36 @@
// #![allow(dead_code,unused_variables)]
// #![allow(unused_must_use)]
use reqwest::{self, Response} ;
pub struct Request {
pub url: String,
}
impl Request {
pub fn new(url: String) -> Self {
Self {
url ,
}
}
pub async fn get(&self) -> Option<Response> {
let response = reqwest::get(&self.url).await.ok() ;
response
}
pub async fn text(response: Response) -> Option<String> {
let text = response.text().await.ok();
text
}
pub async fn status_code(response: Response) -> String {
let status = response.status();
status.to_string()
}
}
Loading…
Cancel
Save