diff --git a/.cspell.json b/.cspell.json index b798d29..9d5ec5b 100644 --- a/.cspell.json +++ b/.cspell.json @@ -11,6 +11,10 @@ ], "language": "en", "noConfigSearch": true, - "words": ["megalinter", "oxsecurity"], + "words": [ + "megalinter", + "oxsecurity", + "websurfx" + ], "version": "0.2" } diff --git a/src/config/parser.rs b/src/config/parser.rs index 201e579..c6ca37a 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -57,7 +57,7 @@ impl Config { /// # Arguments /// /// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get - /// initialized twice. + /// initialized twice. Pass false if the logger has not yet been initialized. /// /// # Error /// @@ -76,23 +76,9 @@ impl Config { let debug: bool = globals.get::<_, bool>("debug")?; let logging:bool= globals.get::<_, bool>("logging")?; - - // Check whether logging has not been initialized before. - if logging_initialized { - if let Ok(pkg_env_var) = std::env::var("PKG_ENV"){ - if pkg_env_var.to_lowercase() == "dev" { - env_logger::Builder::new().filter(None, LevelFilter::Trace).init(); - } - } else { - // Initializing logging middleware with level set to default or info. - let mut log_level: LevelFilter = LevelFilter::Error; - if logging && debug == false { - log_level = LevelFilter::Info; - } else if debug { - log_level = LevelFilter::Debug; - }; - env_logger::Builder::new().filter(None, log_level).init(); - } + + if !logging_initialized { + set_logging_level(debug, logging); } let threads: u8 = if parsed_threads == 0 { @@ -127,6 +113,7 @@ impl Config { }) }) } + /// A helper function which returns an appropriate config file path checking if the config /// file exists on that path. /// @@ -173,3 +160,26 @@ impl Config { Err("Config file not found!!".to_string().into()) } } + +/// a helper function that sets the proper logging level +fn set_logging_level(debug: bool, logging: bool) { + + if let Ok(pkg_env_var) = std::env::var("PKG_ENV") { + if pkg_env_var.to_lowercase() == "dev" { + env_logger::Builder::new() + .filter(None, LevelFilter::Trace) + .init(); + return; + } + } + + // Initializing logging middleware with level set to default or info. + let log_level = match (debug, logging) { + (true, true) => LevelFilter::Error, + (true, false) => LevelFilter::Debug, + (false, true) => LevelFilter::Info, + (false, false) => LevelFilter::Error, + }; + + env_logger::Builder::new().filter(None, log_level).init(); +} diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 42f4994..8958319 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -147,7 +147,7 @@ impl SearchEngine for DuckDuckGo { vec!["duckduckgo".to_string()], ) }) - .map(|search_result| (search_result.visiting_url.clone(), search_result)) + .map(|search_result| (search_result.url.clone(), search_result)) .collect()) } } diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs index b5051be..c6aa030 100644 --- a/src/engines/engine_models.rs +++ b/src/engines/engine_models.rs @@ -43,7 +43,7 @@ impl fmt::Display for EngineError { impl error_stack::Context for EngineError {} -/// A trait to define common behaviour for all search engines. +/// A trait to define common behavior for all search engines. #[async_trait::async_trait] pub trait SearchEngine { async fn fetch_html_from_upstream( @@ -56,7 +56,7 @@ pub trait SearchEngine { Ok(reqwest::Client::new() .get(url) .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server - .headers(header_map) // add spoofed headers to emulate human behaviour + .headers(header_map) // add spoofed headers to emulate human behavior .send() .await .into_report() diff --git a/src/engines/searx.rs b/src/engines/searx.rs index ab73ee0..1caca17 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -137,7 +137,7 @@ impl SearchEngine for Searx { vec!["searx".to_string()], ) }) - .map(|search_result| (search_result.visiting_url.clone(), search_result)) + .map(|search_result| (search_result.url.clone(), search_result)) .collect()) } } diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs index 6766fae..2f1b7b4 100644 --- a/src/results/aggregation_models.rs +++ b/src/results/aggregation_models.rs @@ -11,8 +11,6 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError}; /// # Fields /// /// * `title` - The title of the search result. -/// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple -/// words). /// * `url` - The url to be displayed below the search result title in html. /// * `description` - The description of the search result. /// * `engine` - The names of the upstream engines from which this results were provided. @@ -20,7 +18,6 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError}; #[serde(rename_all = "camelCase")] pub struct SearchResult { pub title: String, - pub visiting_url: String, pub url: String, pub description: String, pub engine: Vec, @@ -37,21 +34,23 @@ impl SearchResult { /// * `url` - The url to be displayed below the search result title in html. /// * `description` - The description of the search result. /// * `engine` - The names of the upstream engines from which this results were provided. - pub fn new( - title: String, - visiting_url: String, - url: String, - description: String, - engine: Vec, - ) -> Self { + pub fn new(title: String, url: String, description: String, engine: Vec) -> Self { SearchResult { title, - visiting_url, url, description, engine, } } + + pub fn from_raw(raw: RawSearchResult) -> Self { + SearchResult { + title: raw.title, + url: raw.url, + description: raw.description, + engine: raw.engine, + } + } } /// A named struct to store the raw scraped search results scraped search results from the @@ -61,14 +60,14 @@ impl SearchResult { /// # Fields /// /// * `title` - The title of the search result. -/// * `visiting_url` - The url which is accessed when clicked on it +/// * `url` - The url which is accessed when clicked on it /// (href url in html in simple words). /// * `description` - The description of the search result. /// * `engine` - The names of the upstream engines from which this results were provided. #[derive(Clone)] pub struct RawSearchResult { pub title: String, - pub visiting_url: String, + pub url: String, pub description: String, pub engine: Vec, } @@ -79,19 +78,14 @@ impl RawSearchResult { /// # Arguments /// /// * `title` - The title of the search result. - /// * `visiting_url` - The url which is accessed when clicked on it + /// * `url` - The url which is accessed when clicked on it /// (href url in html in simple words). /// * `description` - The description of the search result. /// * `engine` - The names of the upstream engines from which this results were provided. - pub fn new( - title: String, - visiting_url: String, - description: String, - engine: Vec, - ) -> Self { + pub fn new(title: String, url: String, description: String, engine: Vec) -> Self { RawSearchResult { title, - visiting_url, + url, description, engine, } diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 77c2f71..4d0c708 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -64,11 +64,10 @@ pub async fn aggregate( page: u32, random_delay: bool, debug: bool, - upstream_search_engines: Vec, + mut upstream_search_engines: Vec, request_timeout: u8, ) -> Result> { let user_agent: String = random_user_agent(); - let mut result_map: HashMap = HashMap::new(); // Add a random delay before making the request. if random_delay || !debug { @@ -77,20 +76,14 @@ pub async fn aggregate( tokio::time::sleep(Duration::from_secs(delay_secs)).await; } - // fetch results from upstream search engines simultaneously/concurrently. - let search_engines: Vec> = upstream_search_engines + // create tasks for upstream result fetching + let tasks: FutureVec = upstream_search_engines .iter() .map(|engine| match engine.to_lowercase().as_str() { "duckduckgo" => Box::new(duckduckgo::DuckDuckGo) as Box, "searx" => Box::new(searx::Searx) as Box, &_ => panic!("Config Error: Incorrect config file option provided"), }) - .collect(); - - let task_capacity: usize = search_engines.len(); - - let tasks: FutureVec = search_engines - .into_iter() .map(|search_engine| { let query: String = query.clone(); let user_agent: String = user_agent.clone(); @@ -102,101 +95,67 @@ pub async fn aggregate( }) .collect(); - let mut outputs = Vec::with_capacity(task_capacity); + // get upstream responses + let mut responses = Vec::with_capacity(tasks.len()); for task in tasks { if let Ok(result) = task.await { - outputs.push(result) + responses.push(result) } } + // aggregate search results, removing duplicates and handling errors the upstream engines returned + let mut result_map: HashMap = HashMap::new(); let mut engine_errors_info: Vec = Vec::new(); - // The code block `outputs.iter()` determines whether it is the first time the code is being run. - // It does this by checking the initial flag. If it is the first time, the code selects the first - // engine from which results are fetched and adds or extends them into the `result_map`. If the - // initially selected engine fails, the code automatically selects another engine to map or extend - // into the `result_map`. On the other hand, if an engine selected for the first time successfully - // fetches results and maps them into the `result_map`, the initial flag is set to false. Subsequently, - // the code iterates through the remaining engines one by one. It compares the fetched results from each - // engine with the results already present in the `result_map` to identify any duplicates. If duplicate - // results are found, the code groups them together with the name of the engine from which they were - // fetched, and automatically removes the duplicate results from the newly fetched data. - // - // Additionally, the code handles errors returned by the engines. It keeps track of which engines - // encountered errors and stores this information in a vector of structures called `EngineErrorInfo`. - // Each structure in this vector contains the name of the engine and the type of error it returned. - // These structures will later be added to the final `SearchResults` structure. The `SearchResults` - // structure is used to display an error box in the UI containing the relevant information from - // the `EngineErrorInfo` structure. - // - // In summary, this code block manages the selection of engines, handling of duplicate results, and tracking - // of errors in order to populate the `result_map` and provide informative feedback to the user through the - // `SearchResults` structure. - let mut initial: bool = true; - let mut counter: usize = 0; - outputs.iter().for_each(|results| { - if initial { - match results { - Ok(result) => { - result_map.extend(result.clone()); - counter += 1; - initial = false + let mut handle_error = |error: Report, engine_name: String| { + log::error!("Engine Error: {:?}", error); + engine_errors_info.push(EngineErrorInfo::new( + error.downcast_ref::().unwrap(), + engine_name, + )); + }; + + for _ in 0..responses.len() { + let response = responses.pop().unwrap(); + let engine_name = upstream_search_engines.pop().unwrap(); + + if result_map.is_empty() { + match response { + Ok(results) => { + result_map = results.clone(); } - Err(error_type) => { - log::error!("Engine Error: {:?}", error_type); - engine_errors_info.push(EngineErrorInfo::new( - error_type.downcast_ref::().unwrap(), - upstream_search_engines[counter].clone(), - )); - counter += 1 + Err(error) => { + handle_error(error, engine_name.clone()); } } - } else { - match results { - Ok(result) => { - result.clone().into_iter().for_each(|(key, value)| { - result_map - .entry(key) - .and_modify(|result| { - result.add_engines(value.clone().engine()); - }) - .or_insert_with(|| -> RawSearchResult { - RawSearchResult::new( - value.title.clone(), - value.visiting_url.clone(), - value.description.clone(), - value.engine.clone(), - ) - }); - }); - counter += 1 - } - Err(error_type) => { - log::error!("Engine Error: {:?}", error_type); - engine_errors_info.push(EngineErrorInfo::new( - error_type.downcast_ref::().unwrap(), - upstream_search_engines[counter].clone(), - )); - counter += 1 - } + continue; + } + + match response { + Ok(result) => { + result.into_iter().for_each(|(key, value)| { + result_map + .entry(key) + .and_modify(|result| { + result.add_engines(engine_name.clone()); + }) + .or_insert_with(|| -> RawSearchResult { value }); + }); + } + Err(error) => { + handle_error(error, engine_name.clone()); } } - }); + } + + let mut results = Vec::with_capacity(result_map.len()); + for (_, result) in result_map { + results.push(SearchResult::from_raw(result)) + } Ok(SearchResults::new( - result_map - .into_iter() - .map(|(key, value)| { - SearchResult::new( - value.title, - value.visiting_url, - key, - value.description, - value.engine, - ) - }) - .collect(), + results, query.to_string(), engine_errors_info, )) diff --git a/tests/index.rs b/tests/index.rs index d886e13..080ad27 100644 --- a/tests/index.rs +++ b/tests/index.rs @@ -8,7 +8,7 @@ fn spawn_app() -> String { // Binding to port 0 will trigger the OS to assign a port for us. let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port"); let port = listener.local_addr().unwrap().port(); - let config = Config::parse(true).unwrap(); + let config = Config::parse(false).unwrap(); let server = run(listener, config).expect("Failed to bind address"); tokio::spawn(server); @@ -36,7 +36,7 @@ async fn test_index() { assert_eq!(res.status(), 200); let handlebars = handlebars(); - let config = Config::parse(false).unwrap(); + let config = Config::parse(true).unwrap(); let template = handlebars.render("index", &config.style).unwrap(); assert_eq!(res.text().await.unwrap(), template); }