Merge branch 'rolling' into change-document-style-with-linter-warnings

2023-09-12 17:49:46 +03:00 · 2023-09-12 17:49:46 +03:00 · fb231de416
commit fb231de416
parent 453dbdc47d 61eaa4710c
26 changed files with 1116 additions and 486 deletions
--- a/src/results/aggregation_models.rs
+++ b/src/results/aggregation_models.rs
@ -2,6 +2,7 @@
 //! data scraped from the upstream search engines.

 use serde::{Deserialize, Serialize};
+use smallvec::SmallVec;

 use crate::{config::parser_models::Style, engines::engine_models::EngineError};

@ -19,7 +20,7 @@ pub struct SearchResult {
    /// The description of the search result.
    pub description: String,
    /// The names of the upstream engines from which this results were provided.
-    pub engine: Vec<String>,
+    pub engine: SmallVec<[String; 0]>,
 }

 impl SearchResult {
@ -32,12 +33,12 @@ impl SearchResult {
    /// (href url in html in simple words).
    /// * `description` - The description of the search result.
    /// * `engine` - The names of the upstream engines from which this results were provided.
-    pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
+    pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
        SearchResult {
-            title,
-            url,
-            description,
-            engine,
+            title: title.to_owned(),
+            url: url.to_owned(),
+            description: description.to_owned(),
+            engine: engine.iter().map(|name| name.to_string()).collect(),
        }
    }

@ -46,8 +47,8 @@ impl SearchResult {
    /// # Arguments
    ///
    /// * `engine` - Takes an engine name provided as a String.
-    pub fn add_engines(&mut self, engine: String) {
-        self.engine.push(engine)
+    pub fn add_engines(&mut self, engine: &str) {
+        self.engine.push(engine.to_owned())
    }

    /// A function which returns the engine name stored from the struct as a string.
@ -55,13 +56,13 @@ impl SearchResult {
    /// # Returns
    ///
    /// An engine name stored as a string from the struct.
-    pub fn engine(self) -> String {
-        self.engine.get(0).unwrap().to_string()
+    pub fn engine(&mut self) -> String {
+        std::mem::take(&mut self.engine[0])
    }
 }

 /// A named struct that stores the error info related to the upstream search engines.
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Clone)]
 pub struct EngineErrorInfo {
    /// It stores the error type which occured while fetching the result from a particular search
    /// engine.
@ -81,18 +82,18 @@ impl EngineErrorInfo {
    /// * `error` - It takes the error type which occured while fetching the result from a particular
    /// search engine.
    /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
-    pub fn new(error: &EngineError, engine: String) -> Self {
+    pub fn new(error: &EngineError, engine: &str) -> Self {
        Self {
            error: match error {
-                EngineError::RequestError => String::from("RequestError"),
-                EngineError::EmptyResultSet => String::from("EmptyResultSet"),
-                EngineError::UnexpectedError => String::from("UnexpectedError"),
+                EngineError::RequestError => "RequestError".to_owned(),
+                EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
+                EngineError::UnexpectedError => "UnexpectedError".to_owned(),
            },
-            engine,
+            engine: engine.to_owned(),
            severity_color: match error {
-                EngineError::RequestError => String::from("green"),
-                EngineError::EmptyResultSet => String::from("blue"),
-                EngineError::UnexpectedError => String::from("red"),
+                EngineError::RequestError => "green".to_owned(),
+                EngineError::EmptyResultSet => "blue".to_owned(),
+                EngineError::UnexpectedError => "red".to_owned(),
            },
        }
    }
@ -101,7 +102,7 @@ impl EngineErrorInfo {
 /// A named struct to store, serialize, deserialize the all the search results scraped and
 /// aggregated from the upstream search engines.
 /// `SearchResult` structs.
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
    /// Stores the individual serializable `SearchResult` struct into a vector of
@ -113,6 +114,14 @@ pub struct SearchResults {
    /// Stores the information on which engines failed with their engine name
    /// and the type of error that caused it.
    pub engine_errors_info: Vec<EngineErrorInfo>,
+    /// Stores the flag option which holds the check value that the following 
+    /// search query was disallowed when the safe search level set to 4 and it 
+    /// was present in the `Blocklist` file.
+    pub disallowed: bool,
+    /// Stores the flag option which holds the check value that the following 
+    /// search query was filtered when the safe search level set to 3 and it 
+    /// was present in the `Blocklist` file.
+    pub filtered: bool,
 }

 impl SearchResults {
@ -126,21 +135,48 @@ impl SearchResults {
    /// the search url.
    /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
    /// given search query.
+    /// * ``
    pub fn new(
        results: Vec<SearchResult>,
-        page_query: String,
-        engine_errors_info: Vec<EngineErrorInfo>,
+        page_query: &str,
+        engine_errors_info: &[EngineErrorInfo],
    ) -> Self {
-        SearchResults {
+        Self {
            results,
-            page_query,
-            style: Style::new("".to_string(), "".to_string()),
-            engine_errors_info,
+            page_query: page_query.to_owned(),
+            style: Style::default(),
+            engine_errors_info: engine_errors_info.to_owned(),
+            disallowed: Default::default(),
+            filtered: Default::default(),
        }
    }

    /// A setter function to add website style to the return search results.
-    pub fn add_style(&mut self, style: Style) {
-        self.style = style;
+    pub fn add_style(&mut self, style: &Style) {
+        self.style = style.clone();
+    }
+
+    /// A setter function that sets disallowed to true.
+    pub fn set_disallowed(&mut self) {
+        self.disallowed = true;
+    }
+
+    /// A setter function to set the current page search query.
+    pub fn set_page_query(&mut self, page: &str) {
+        self.page_query = page.to_owned();
+    }
+
+    /// A setter function that sets the filtered to true.
+    pub fn set_filtered(&mut self) {
+        self.filtered = true;
+    }
+
+    /// A getter function that gets the value of `engine_errors_info`.
+    pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
+        std::mem::take(&mut self.engine_errors_info)
+    }
+    /// A getter function that gets the value of `results`.
+    pub fn results(&mut self) -> Vec<SearchResult> {
+        self.results.clone()
    }
 }
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@ -64,14 +64,15 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
 /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
 /// containing appropriate values.
 pub async fn aggregate(
-    query: String,
+    query: &str,
    page: u32,
    random_delay: bool,
    debug: bool,
-    upstream_search_engines: Vec<EngineHandler>,
+    upstream_search_engines: &[EngineHandler],
    request_timeout: u8,
+    safe_search: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
-    let user_agent: String = random_user_agent();
+    let user_agent: &str = random_user_agent();

    // Add a random delay before making the request.
    if random_delay || !debug {
@ -80,19 +81,24 @@ pub async fn aggregate(
        tokio::time::sleep(Duration::from_secs(delay_secs)).await;
    }

-    let mut names: Vec<&str> = vec![];
+    let mut names: Vec<&str> = Vec::with_capacity(0);

    // create tasks for upstream result fetching
    let mut tasks: FutureVec = FutureVec::new();

    for engine_handler in upstream_search_engines {
-        let (name, search_engine) = engine_handler.into_name_engine();
+        let (name, search_engine) = engine_handler.to_owned().into_name_engine();
        names.push(name);
-        let query: String = query.clone();
-        let user_agent: String = user_agent.clone();
+        let query: String = query.to_owned();
        tasks.push(tokio::spawn(async move {
            search_engine
-                .results(query, page, user_agent.clone(), request_timeout)
+                .results(
+                    &query,
+                    page,
+                    user_agent.clone(),
+                    request_timeout,
+                    safe_search,
+                )
                .await
        }));
    }
@ -110,7 +116,7 @@ pub async fn aggregate(
    let mut result_map: HashMap<String, SearchResult> = HashMap::new();
    let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();

-    let mut handle_error = |error: Report<EngineError>, engine_name: String| {
+    let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
        log::error!("Engine Error: {:?}", error);
        engine_errors_info.push(EngineErrorInfo::new(
            error.downcast_ref::<EngineError>().unwrap(),
@ -120,7 +126,7 @@ pub async fn aggregate(

    for _ in 0..responses.len() {
        let response = responses.pop().unwrap();
-        let engine = names.pop().unwrap().to_string();
+        let engine = names.pop().unwrap();

        if result_map.is_empty() {
            match response {
@ -128,7 +134,7 @@ pub async fn aggregate(
                    result_map = results.clone();
                }
                Err(error) => {
-                    handle_error(error, engine);
+                    handle_error(&error, engine);
                }
            }
            continue;
@ -140,39 +146,37 @@ pub async fn aggregate(
                    result_map
                        .entry(key)
                        .and_modify(|result| {
-                            result.add_engines(engine.clone());
+                            result.add_engines(engine);
                        })
                        .or_insert_with(|| -> SearchResult { value });
                });
            }
            Err(error) => {
-                handle_error(error, engine);
+                handle_error(&error, engine);
            }
        }
    }

-    let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
-    filter_with_lists(
-        &mut result_map,
-        &mut blacklist_map,
-        &file_path(FileType::BlockList)?,
-    )?;
+    if safe_search >= 3 {
+        let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
+        filter_with_lists(
+            &mut result_map,
+            &mut blacklist_map,
+            file_path(FileType::BlockList)?,
+        )?;

-    filter_with_lists(
-        &mut blacklist_map,
-        &mut result_map,
-        &file_path(FileType::AllowList)?,
-    )?;
+        filter_with_lists(
+            &mut blacklist_map,
+            &mut result_map,
+            file_path(FileType::AllowList)?,
+        )?;

-    drop(blacklist_map);
+        drop(blacklist_map);
+    }

    let results: Vec<SearchResult> = result_map.into_values().collect();

-    Ok(SearchResults::new(
-        results,
-        query.to_string(),
-        engine_errors_info,
-    ))
+    Ok(SearchResults::new(results, query, &engine_errors_info))
 }

 /// Filters a map of search results using a list of regex patterns.
@ -194,7 +198,7 @@ pub fn filter_with_lists(
    let mut reader = BufReader::new(File::open(file_path)?);

    for line in reader.by_ref().lines() {
-        let re = Regex::new(&line?)?;
+        let re = Regex::new(line?.trim())?;

        // Iterate over each search result in the map and check if it matches the regex pattern
        for (url, search_result) in map_to_be_filtered.clone().into_iter() {
@ -203,7 +207,10 @@ pub fn filter_with_lists(
                || re.is_match(&search_result.description.to_lowercase())
            {
                // If the search result matches the regex pattern, move it from the original map to the resultant map
-                resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
+                resultant_map.insert(
+                    url.to_owned(),
+                    map_to_be_filtered.remove(&url.to_owned()).unwrap(),
+                );
            }
        }
    }
@ -214,6 +221,7 @@ pub fn filter_with_lists(
 #[cfg(test)]
 mod tests {
    use super::*;
+    use smallvec::smallvec;
    use std::collections::HashMap;
    use std::io::Write;
    use tempfile::NamedTempFile;
@ -223,22 +231,22 @@ mod tests {
        // Create a map of search results to filter
        let mut map_to_be_filtered = HashMap::new();
        map_to_be_filtered.insert(
-            "https://www.example.com".to_string(),
+            "https://www.example.com".to_owned(),
            SearchResult {
-                title: "Example Domain".to_string(),
-                url: "https://www.example.com".to_string(),
+                title: "Example Domain".to_owned(),
+                url: "https://www.example.com".to_owned(),
                description: "This domain is for use in illustrative examples in documents."
-                    .to_string(),
-                engine: vec!["Google".to_string(), "Bing".to_string()],
+                    .to_owned(),
+                engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
            },
        );
        map_to_be_filtered.insert(
-            "https://www.rust-lang.org/".to_string(),
+            "https://www.rust-lang.org/".to_owned(),
            SearchResult {
-                title: "Rust Programming Language".to_string(),
-                url: "https://www.rust-lang.org/".to_string(),
-                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
-                engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
+                title: "Rust Programming Language".to_owned(),
+                url: "https://www.rust-lang.org/".to_owned(),
+                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
+                engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
            },
        );

@ -267,22 +275,22 @@ mod tests {
    fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
        let mut map_to_be_filtered = HashMap::new();
        map_to_be_filtered.insert(
-            "https://www.example.com".to_string(),
+            "https://www.example.com".to_owned(),
            SearchResult {
-                title: "Example Domain".to_string(),
-                url: "https://www.example.com".to_string(),
+                title: "Example Domain".to_owned(),
+                url: "https://www.example.com".to_owned(),
                description: "This domain is for use in illustrative examples in documents."
-                    .to_string(),
-                engine: vec!["Google".to_string(), "Bing".to_string()],
+                    .to_owned(),
+                engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
            },
        );
        map_to_be_filtered.insert(
-            "https://www.rust-lang.org/".to_string(),
+            "https://www.rust-lang.org/".to_owned(),
            SearchResult {
-                title: "Rust Programming Language".to_string(),
-                url: "https://www.rust-lang.org/".to_string(),
-                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
-                engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
+                title: "Rust Programming Language".to_owned(),
+                url: "https://www.rust-lang.org/".to_owned(),
+                description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
+                engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
            },
        );

@ -327,13 +335,13 @@ mod tests {
    fn test_filter_with_lists_invalid_regex() {
        let mut map_to_be_filtered = HashMap::new();
        map_to_be_filtered.insert(
-            "https://www.example.com".to_string(),
+            "https://www.example.com".to_owned(),
            SearchResult {
-                title: "Example Domain".to_string(),
-                url: "https://www.example.com".to_string(),
+                title: "Example Domain".to_owned(),
+                url: "https://www.example.com".to_owned(),
                description: "This domain is for use in illustrative examples in documents."
-                    .to_string(),
-                engine: vec!["Google".to_string(), "Bing".to_string()],
+                    .to_owned(),
+                engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
            },
        );

--- a/src/results/user_agent.rs
+++ b/src/results/user_agent.rs
@ -1,30 +1,34 @@
 //! This module provides the functionality to generate random user agent string.

+use std::sync::OnceLock;
+
 use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};

 /// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
 /// again and again without the need of reinitializing the `UserAgents` struct.
-static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
-    UserAgentsBuilder::new()
-        .cache(false)
-        .dir("/tmp")
-        .thread(1)
-        .set_browsers(
-            Browsers::new()
-                .set_chrome()
-                .set_safari()
-                .set_edge()
-                .set_firefox()
-                .set_mozilla(),
-        )
-        .build()
-});
+static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();

 /// A function to generate random user agent to improve privacy of the user.
 ///
 /// # Returns
 ///
 /// A randomly generated user agent string.
-pub fn random_user_agent() -> String {
-    USER_AGENTS.random().to_string()
+pub fn random_user_agent() -> &'static str {
+    USER_AGENTS
+        .get_or_init(|| {
+            UserAgentsBuilder::new()
+                .cache(false)
+                .dir("/tmp")
+                .thread(1)
+                .set_browsers(
+                    Browsers::new()
+                        .set_chrome()
+                        .set_safari()
+                        .set_edge()
+                        .set_firefox()
+                        .set_mozilla(),
+                )
+                .build()
+        })
+        .random()
 }