diff --git a/src/bin/websurfx.rs b/src/bin/websurfx.rs index 47ba367..18ccf6f 100644 --- a/src/bin/websurfx.rs +++ b/src/bin/websurfx.rs @@ -19,6 +19,8 @@ struct CliArgs { const PORT_RANGE: RangeInclusive = 1024..=65535; +// A function to check whether port is valid u32 number or is in range +// between [1024-65536] otherwise display an appropriate error message. fn is_port_in_range(s: &str) -> Result { let port: usize = s .parse() @@ -39,6 +41,7 @@ fn is_port_in_range(s: &str) -> Result { async fn main() -> std::io::Result<()> { let args = CliArgs::parse(); + // Initializing logging middleware with level set to default or info. env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); log::info!("started server on port {}", args.port); @@ -54,7 +57,7 @@ async fn main() -> std::io::Result<()> { HttpServer::new(move || { App::new() .app_data(handlebars_ref.clone()) - .wrap(Logger::default()) + .wrap(Logger::default()) // added logging middleware for logging. // Serve images and static files (css and js files). .service(fs::Files::new("/static", "./public/static").show_files_listing()) .service(fs::Files::new("/images", "./public/images").show_files_listing()) diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index de4e360..d382eca 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -48,49 +48,37 @@ pub async fn results( let result_url: Selector = Selector::parse(".result__url")?; let result_desc: Selector = Selector::parse(".result__snippet")?; - let mut search_results: HashMap = HashMap::new(); - // scrape all the results from the html - for result in document.select(&results) { - let search_result: RawSearchResult = RawSearchResult { - title: result - .select(&result_title) - .next() - .unwrap() - .inner_html() - .trim() - .to_string(), - visiting_url: format!( - "https://{}", + Ok(document + .select(&results) + .map(|result| { + RawSearchResult::new( result - .select(&result_url) + .select(&result_title) .next() .unwrap() .inner_html() .trim() - ), - description: result - .select(&result_desc) - .next() - .unwrap() - .inner_html() - .trim() - .to_string(), - engine: vec!["duckduckgo".to_string()], - }; - search_results.insert( - format!( - "https://{}", + .to_string(), + format!( + "https://{}", + result + .select(&result_url) + .next() + .unwrap() + .inner_html() + .trim() + ), result - .select(&result_url) + .select(&result_desc) .next() .unwrap() .inner_html() .trim() - ), - search_result, - ); - } - - Ok(search_results) + .to_string(), + vec!["duckduckgo".to_string()], + ) + }) + .map(|search_result| (search_result.visiting_url.clone(), search_result)) + .collect()) } diff --git a/src/engines/searx.rs b/src/engines/searx.rs index f35c36d..94482c2 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -43,47 +43,36 @@ pub async fn results( let result_url: Selector = Selector::parse("h3>a")?; let result_desc: Selector = Selector::parse(".content")?; - let mut search_results: HashMap = HashMap::new(); - // scrape all the results from the html - for result in document.select(&results) { - let search_result: RawSearchResult = RawSearchResult { - title: result - .select(&result_title) - .next() - .unwrap() - .inner_html() - .trim() - .to_string(), - visiting_url: result - .select(&result_url) - .next() - .unwrap() - .value() - .attr("href") - .unwrap() - .to_string(), - description: result - .select(&result_desc) - .next() - .unwrap() - .inner_html() - .trim() - .to_string(), - engine: vec!["searx".to_string()], - }; - search_results.insert( - result - .select(&result_url) - .next() - .unwrap() - .value() - .attr("href") - .unwrap() - .to_string(), - search_result, - ); - } - - Ok(search_results) + Ok(document + .select(&results) + .map(|result| { + RawSearchResult::new( + result + .select(&result_title) + .next() + .unwrap() + .inner_html() + .trim() + .to_string(), + result + .select(&result_url) + .next() + .unwrap() + .value() + .attr("href") + .unwrap() + .to_string(), + result + .select(&result_desc) + .next() + .unwrap() + .inner_html() + .trim() + .to_string(), + vec!["searx".to_string()], + ) + }) + .map(|search_result| (search_result.visiting_url.clone(), search_result)) + .collect()) } diff --git a/src/search_results_handler/aggregation_models.rs b/src/search_results_handler/aggregation_models.rs index 4268447..07980b8 100644 --- a/src/search_results_handler/aggregation_models.rs +++ b/src/search_results_handler/aggregation_models.rs @@ -10,6 +10,24 @@ pub struct SearchResult { pub engine: Vec, } +impl SearchResult { + pub fn new( + title: String, + visiting_url: String, + url: String, + description: String, + engine: Vec, + ) -> Self { + SearchResult { + title, + visiting_url, + url, + description, + engine, + } + } +} + pub struct RawSearchResult { pub title: String, pub visiting_url: String, @@ -17,9 +35,37 @@ pub struct RawSearchResult { pub engine: Vec, } +impl RawSearchResult { + pub fn new( + title: String, + visiting_url: String, + description: String, + engine: Vec, + ) -> Self { + RawSearchResult { + title, + visiting_url, + description, + engine, + } + } + pub fn add_engines(&mut self, engine: String) { + self.engine.push(engine) + } +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SearchResults { pub results: Vec, pub page_query: String, } + +impl SearchResults { + pub fn new(results: Vec, page_query: String) -> Self { + SearchResults { + results, + page_query, + } + } +} diff --git a/src/search_results_handler/aggregator.rs b/src/search_results_handler/aggregator.rs index ed689c8..9752216 100644 --- a/src/search_results_handler/aggregator.rs +++ b/src/search_results_handler/aggregator.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; -use fake_useragent::{Browsers, UserAgentsBuilder}; +use super::{ + aggregation_models::{RawSearchResult, SearchResult, SearchResults}, + user_agent::random_user_agent, +}; -use super::aggregation_models::{RawSearchResult, SearchResult, SearchResults}; use crate::engines::{duckduckgo, searx}; // A function that aggregates all the scraped results from the above upstream engines and @@ -20,23 +22,7 @@ pub async fn aggregate( query: &str, page: Option, ) -> Result> { - // Generate random user agent to improve privacy of the user. - let user_agent: String = UserAgentsBuilder::new() - .cache(false) - .dir("/tmp") - .thread(1) - .set_browsers( - Browsers::new() - .set_chrome() - .set_safari() - .set_edge() - .set_firefox() - .set_mozilla(), - ) - .build() - .random() - .to_string(); - + let user_agent: String = random_user_agent(); let mut result_map: HashMap = HashMap::new(); let ddg_map_results: HashMap = @@ -46,32 +32,35 @@ pub async fn aggregate( result_map.extend(ddg_map_results); - for (key, value) in searx_map_results.into_iter() { - if result_map.contains_key(&key) { - result_map - .get_mut(&key) - .unwrap() - .engine - .push(value.engine.get(0).unwrap().to_string()) - } else { - result_map.insert(key, value); - } - } + searx_map_results.into_iter().for_each(|(key, value)| { + result_map + .entry(key) + .and_modify(|result| { + result.add_engines(value.engine[0].clone()); + }) + .or_insert_with(|| -> RawSearchResult { + RawSearchResult::new( + value.title.clone(), + value.visiting_url.clone(), + value.description.clone(), + value.engine.clone(), + ) + }); + }); - let mut search_results: Vec = Vec::new(); - - for (key, value) in result_map.into_iter() { - search_results.push(SearchResult { - title: value.title, - visiting_url: value.visiting_url, - url: key, - description: value.description, - engine: value.engine, - }) - } - - Ok(SearchResults { - results: search_results, - page_query: query.to_string(), - }) + Ok(SearchResults::new( + result_map + .into_iter() + .map(|(key, value)| { + SearchResult::new( + value.title, + value.visiting_url, + key, + value.description, + value.engine, + ) + }) + .collect(), + query.to_string(), + )) } diff --git a/src/search_results_handler/mod.rs b/src/search_results_handler/mod.rs index 416b222..0c13442 100644 --- a/src/search_results_handler/mod.rs +++ b/src/search_results_handler/mod.rs @@ -1,2 +1,3 @@ pub mod aggregation_models; pub mod aggregator; +pub mod user_agent; diff --git a/src/search_results_handler/user_agent.rs b/src/search_results_handler/user_agent.rs new file mode 100644 index 0000000..1b147aa --- /dev/null +++ b/src/search_results_handler/user_agent.rs @@ -0,0 +1,20 @@ +use fake_useragent::{Browsers, UserAgentsBuilder}; + +// A function to generate random user agent to improve privacy of the user. +pub fn random_user_agent() -> String { + UserAgentsBuilder::new() + .cache(false) + .dir("/tmp") + .thread(1) + .set_browsers( + Browsers::new() + .set_chrome() + .set_safari() + .set_edge() + .set_firefox() + .set_mozilla(), + ) + .build() + .random() + .to_string() +}