Rename Things, refactor some code

BREAKING: renames `binding_ip_addr` to `binding_ip` and `redis_connection_url` to `redis_url`. Renames a lot of internals as well, but they are to many to mention.
2023-07-03 19:30:25 +02:00 · 2023-07-03 19:30:25 +02:00 · 440216871d
commit 440216871d
parent b18db5414a
17 changed files with 80 additions and 74 deletions
--- a/src/results/aggregation_models.rs
+++ b/src/results/aggregation_models.rs
@ -0,0 +1,155 @@
+//! This module provides public models for handling, storing and serializing of search results
+//! data scraped from the upstream search engines.
+
+use serde::{Deserialize, Serialize};
+
+use crate::config::parser_models::Style;
+
+/// A named struct to store, serialize and deserializes the individual search result from all the
+/// scraped and aggregated search results from the upstream search engines.
+///
+/// # Fields
+///
+/// * `title` - The title of the search result.
+/// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple
+/// words).
+/// * `url` - The url to be displayed below the search result title in html.
+/// * `description` - The description of the search result.
+/// * `engine` - The names of the upstream engines from which this results were provided.
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SearchResult {
+    pub title: String,
+    pub visiting_url: String,
+    pub url: String,
+    pub description: String,
+    pub engine: Vec<String>,
+}
+
+impl SearchResult {
+    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `title` - The title of the search result.
+    /// * `visiting_url` - The url which is accessed when clicked on it
+    /// (href url in html in simple words).
+    /// * `url` - The url to be displayed below the search result title in html.
+    /// * `description` - The description of the search result.
+    /// * `engine` - The names of the upstream engines from which this results were provided.
+    pub fn new(
+        title: String,
+        visiting_url: String,
+        url: String,
+        description: String,
+        engine: Vec<String>,
+    ) -> Self {
+        SearchResult {
+            title,
+            visiting_url,
+            url,
+            description,
+            engine,
+        }
+    }
+}
+
+/// A named struct to store the raw scraped search results scraped search results from the
+/// upstream search engines before aggregating it.It derives the Clone trait which is needed
+/// to write idiomatic rust using `Iterators`.
+///
+/// # Fields
+///
+/// * `title` - The title of the search result.
+/// * `visiting_url` - The url which is accessed when clicked on it
+/// (href url in html in simple words).
+/// * `description` - The description of the search result.
+/// * `engine` - The names of the upstream engines from which this results were provided.
+#[derive(Clone)]
+pub struct RawSearchResult {
+    pub title: String,
+    pub visiting_url: String,
+    pub description: String,
+    pub engine: Vec<String>,
+}
+
+impl RawSearchResult {
+    /// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `title` - The title of the search result.
+    /// * `visiting_url` - The url which is accessed when clicked on it
+    /// (href url in html in simple words).
+    /// * `description` - The description of the search result.
+    /// * `engine` - The names of the upstream engines from which this results were provided.
+    pub fn new(
+        title: String,
+        visiting_url: String,
+        description: String,
+        engine: Vec<String>,
+    ) -> Self {
+        RawSearchResult {
+            title,
+            visiting_url,
+            description,
+            engine,
+        }
+    }
+
+    /// A function which adds the engine name provided as a string into a vector of strings.
+    ///
+    /// # Arguments
+    ///
+    /// * `engine` - Takes an engine name provided as a String.
+    pub fn add_engines(&mut self, engine: String) {
+        self.engine.push(engine)
+    }
+
+    /// A function which returns the engine name stored from the struct as a string.
+    ///
+    /// # Returns
+    ///
+    /// An engine name stored as a string from the struct.
+    pub fn engine(self) -> String {
+        self.engine.get(0).unwrap().to_string()
+    }
+}
+
+/// A named struct to store, serialize, deserialize the all the search results scraped and
+/// aggregated from the upstream search engines.
+///
+/// # Fields
+///
+/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
+/// `SearchResult` structs.
+/// * `page_query` - Stores the current pages search query `q` provided in the search url.
+#[derive(Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SearchResults {
+    pub results: Vec<SearchResult>,
+    pub page_query: String,
+    pub style: Style,
+}
+
+impl SearchResults {
+    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `results` - Takes an argument of individual serializable `SearchResult` struct
+    /// and stores it into a vector of `SearchResult` structs.
+    /// * `page_query` - Takes an argument of current page`s search query `q` provided in
+    /// the search url.
+    pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
+        SearchResults {
+            results,
+            page_query,
+            style: Style::new("".to_string(), "".to_string()),
+        }
+    }
+
+    pub fn add_style(&mut self, style: Style) {
+        self.style = style;
+    }
+}
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@ -0,0 +1,108 @@
+//! This module provides the functionality to scrape and gathers all the results from the upstream
+//! search engines and then removes duplicate results.
+
+use std::{collections::HashMap, time::Duration};
+
+use rand::Rng;
+use tokio::join;
+
+use super::{
+    aggregation_models::{RawSearchResult, SearchResult, SearchResults},
+    user_agent::random_user_agent,
+};
+
+use crate::engines::{duckduckgo, searx};
+
+/// A function that aggregates all the scraped results from the above upstream engines and
+/// then removes duplicate results and if two results are found to be from two or more engines
+/// then puts their names together to show the results are fetched from these upstream engines
+/// and then removes all data from the HashMap and puts into a struct of all results aggregated
+/// into a vector and also adds the query used into the struct this is necessary because
+/// otherwise the search bar in search remains empty if searched from the query url
+///
+/// # Example:
+///
+/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
+/// contain the word huston and not remain empty.
+///
+/// # Arguments
+///
+/// * `query` - Accepts a string to query with the above upstream search engines.
+/// * `page` - Accepts an u32 page number.
+/// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
+///
+/// # Error
+///
+/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
+/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
+/// containing appropriate values.
+pub async fn aggregate(
+    query: &str,
+    page: u32,
+    random_delay: bool,
+    debug: bool,
+) -> Result<SearchResults, Box<dyn std::error::Error>> {
+    let user_agent: String = random_user_agent();
+    let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
+
+    // Add a random delay before making the request.
+    if random_delay || !debug {
+        let mut rng = rand::thread_rng();
+        let delay_secs = rng.gen_range(1..10);
+        std::thread::sleep(Duration::from_secs(delay_secs));
+    }
+
+    // fetch results from upstream search engines simultaneously/concurrently.
+    let (ddg_map_results, searx_map_results) = join!(
+        duckduckgo::results(query, page, &user_agent),
+        searx::results(query, page, &user_agent)
+    );
+
+    let ddg_map_results = ddg_map_results.unwrap_or_else(|e| {
+        if debug {
+            log::error!("Error fetching results from DuckDuckGo: {:?}", e);
+        }
+        HashMap::new()
+    });
+
+    let searx_map_results = searx_map_results.unwrap_or_else(|e| {
+        if debug {
+            log::error!("Error fetching results from Searx: {:?}", e);
+        }
+        HashMap::new()
+    });
+
+    result_map.extend(ddg_map_results);
+
+    searx_map_results.into_iter().for_each(|(key, value)| {
+        result_map
+            .entry(key)
+            .and_modify(|result| {
+                result.add_engines(value.clone().engine());
+            })
+            .or_insert_with(|| -> RawSearchResult {
+                RawSearchResult::new(
+                    value.title.clone(),
+                    value.visiting_url.clone(),
+                    value.description.clone(),
+                    value.engine.clone(),
+                )
+            });
+    });
+
+    Ok(SearchResults::new(
+        result_map
+            .into_iter()
+            .map(|(key, value)| {
+                SearchResult::new(
+                    value.title,
+                    value.visiting_url,
+                    key,
+                    value.description,
+                    value.engine,
+                )
+            })
+            .collect(),
+        query.to_string(),
+    ))
+}
--- a/src/results/mod.rs
+++ b/src/results/mod.rs
@ -0,0 +1,3 @@
+pub mod aggregation_models;
+pub mod aggregator;
+pub mod user_agent;
--- a/src/results/user_agent.rs
+++ b/src/results/user_agent.rs
@ -0,0 +1,28 @@
+//! This module provides the functionality to generate random user agent string.
+
+use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
+
+static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
+    UserAgentsBuilder::new()
+        .cache(false)
+        .dir("/tmp")
+        .thread(1)
+        .set_browsers(
+            Browsers::new()
+                .set_chrome()
+                .set_safari()
+                .set_edge()
+                .set_firefox()
+                .set_mozilla(),
+        )
+        .build()
+});
+
+/// A function to generate random user agent to improve privacy of the user.
+///
+/// # Returns
+///
+/// A randomly generated user agent string.
+pub fn random_user_agent() -> String {
+    USER_AGENTS.random().to_string()
+}