Merge branch 'rolling' into improve-async-multithreading

2023-07-15 19:50:31 +03:00 · 2023-07-15 19:50:31 +03:00 · 74e4fc6169
commit 74e4fc6169
parent 1c2ea24024 e4625c3fd0
51 changed files with 548 additions and 339 deletions
--- a/src/bin/websurfx.rs
+++ b/src/bin/websurfx.rs
@ -5,7 +5,7 @@

 use std::net::TcpListener;

-use websurfx::{config_parser::parser::Config, run};
+use websurfx::{config::parser::Config, run};

 /// The function that launches the main server and registers all the routes of the website.
 ///
@ -26,7 +26,7 @@ async fn main() -> std::io::Result<()> {

    log::info!("started server on port {}", config.port);

-    let listener = TcpListener::bind((config.binding_ip_addr.clone(), config.port))?;
+    let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;

    run(listener, config)?.await
 }
--- a/src/cache/cacher.rs
+++ b/src/cache/cacher.rs
@ -32,7 +32,7 @@ impl RedisCache {
    /// # Arguments
    ///
    /// * `url` - It takes an url as string.
-    fn compute_url_hash(url: &str) -> String {
+    fn hash_url(url: &str) -> String {
        format!("{:?}", compute(url))
    }

@ -41,8 +41,8 @@ impl RedisCache {
    /// # Arguments
    ///
    /// * `url` - It takes an url as a string.
-    pub fn cached_results_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
-        let hashed_url_string = Self::compute_url_hash(url);
+    pub fn cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
+        let hashed_url_string = Self::hash_url(url);
        Ok(self.connection.get(hashed_url_string)?)
    }

@ -59,7 +59,7 @@ impl RedisCache {
        json_results: String,
        url: &str,
    ) -> Result<(), Box<dyn std::error::Error>> {
-        let hashed_url_string = Self::compute_url_hash(url);
+        let hashed_url_string = Self::hash_url(url);

        // put results_json into cache
        self.connection.set(&hashed_url_string, json_results)?;
--- a/src/config_parser/mod.rs
+++ b/src/config_parser/mod.rs
--- a/src/config_parser/parser.rs
+++ b/src/config_parser/parser.rs
@ -14,9 +14,9 @@ static CONFIG_FILE_NAME: &str = "config.lua";
 /// # Fields
 //
 /// * `port` - It stores the parsed port number option on which the server should launch.
-/// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch
+/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
 /// * `style` - It stores the theming options for the website.
-/// * `redis_connection_url` - It stores the redis connection url address on which the redis
+/// * `redis_url` - It stores the redis connection url address on which the redis
 /// client should connect.
 /// * `aggregator` -  It stores the option to whether enable or disable production use.
 /// * `logging` - It stores the option to whether enable or disable logs.
@ -25,10 +25,10 @@ static CONFIG_FILE_NAME: &str = "config.lua";
 #[derive(Clone)]
 pub struct Config {
    pub port: u16,
-    pub binding_ip_addr: String,
+    pub binding_ip: String,
    pub style: Style,
-    pub redis_connection_url: String,
-    pub aggregator: AggreatorConfig,
+    pub redis_url: String,
+    pub aggregator: AggregatorConfig,
    pub logging: bool,
    pub debug: bool,
    pub upstream_search_engines: Vec<String>,
@ -41,47 +41,38 @@ pub struct Config {
 /// * `random_delay` - It stores the option to whether enable or disable random delays between
 /// requests.
 #[derive(Clone)]
-pub struct AggreatorConfig {
+pub struct AggregatorConfig {
    pub random_delay: bool,
 }

 impl Config {
    /// A function which parses the config.lua file and puts all the parsed options in the newly
-    /// contructed Config struct and returns it.
+    /// constructed Config struct and returns it.
    ///
    /// # Error
    ///
    /// Returns a lua parse error if parsing of the config.lua file fails or has a syntax error
-    /// or io error if the config.lua file doesn't exists otherwise it returns a newly contructed
+    /// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
    /// Config struct with all the parsed config options from the parsed config file.
    pub fn parse() -> Result<Self, Box<dyn std::error::Error>> {
        Lua::new().context(|context| -> Result<Self, Box<dyn std::error::Error>> {
            let globals = context.globals();

            context
-                .load(&fs::read_to_string(
-                    Config::handle_different_config_file_path()?,
-                )?)
+                .load(&fs::read_to_string(Config::config_path()?)?)
                .exec()?;

-            let production_use = globals.get::<_, bool>("production_use")?;
-            let aggregator_config = if production_use {
-                AggreatorConfig { random_delay: true }
-            } else {
-                AggreatorConfig {
-                    random_delay: false,
-                }
-            };
-
            Ok(Config {
                port: globals.get::<_, u16>("port")?,
-                binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
+                binding_ip: globals.get::<_, String>("binding_ip")?,
                style: Style::new(
                    globals.get::<_, String>("theme")?,
                    globals.get::<_, String>("colorscheme")?,
                ),
-                redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
-                aggregator: aggregator_config,
+                redis_url: globals.get::<_, String>("redis_url")?,
+                aggregator: AggregatorConfig {
+                    random_delay: globals.get::<_, bool>("production_use")?,
+                },
                logging: globals.get::<_, bool>("logging")?,
                debug: globals.get::<_, bool>("debug")?,
                upstream_search_engines: globals
@ -104,35 +95,37 @@ impl Config {
    ///    one (3).
    /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
    ///    here then it returns an error as mentioned above.
-    fn handle_different_config_file_path() -> Result<String, Box<dyn std::error::Error>> {
-        if Path::new(
-            format!(
-                "{}/.config/{}/config.lua",
-                std::env::var("HOME").unwrap(),
-                COMMON_DIRECTORY_NAME
-            )
-            .as_str(),
-        )
-        .exists()
-        {
-            Ok(format!(
+    fn config_path() -> Result<String, Box<dyn std::error::Error>> {
+        // check user config
+
+        let path = format!(
+            "{}/.config/{}/config.lua",
+            std::env::var("HOME").unwrap(),
+            COMMON_DIRECTORY_NAME
+        );
+        if Path::new(path.as_str()).exists() {
+            return Ok(format!(
                "{}/.config/{}/{}",
                std::env::var("HOME").unwrap(),
                COMMON_DIRECTORY_NAME,
                CONFIG_FILE_NAME
-            ))
-        } else if Path::new(
-            format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str(),
-        )
-        .exists()
-        {
-            Ok("/etc/xdg/websurfx/config.lua".to_string())
-        } else if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
+            ));
+        }
+
+        // look for config in /etc/xdg
+        if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
            .exists()
        {
-            Ok("./websurfx/config.lua".to_string())
-        } else {
-            Err("Config file not found!!".to_string().into())
+            return Ok("/etc/xdg/websurfx/config.lua".to_string());
        }
+
+        // use dev config
+        if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists()
+        {
+            return Ok("./websurfx/config.lua".to_string());
+        }
+
+        // if no of the configs above exist, return error
+        Err("Config file not found!!".to_string().into())
    }
 }
--- a/src/config_parser/parser_models.rs
+++ b/src/config_parser/parser_models.rs
@ -1,5 +1,5 @@
 //! This module provides public models for handling, storing and serializing parsed config file
-//! options from config.lua by grouping them togather.
+//! options from config.lua by grouping them together.

 use serde::{Deserialize, Serialize};

--- a/src/engines/duckduckgo.rs
+++ b/src/engines/duckduckgo.rs
@ -7,7 +7,7 @@ use std::collections::HashMap;
 use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
 use scraper::{Html, Selector};

-use crate::search_results_handler::aggregation_models::RawSearchResult;
+use crate::results::aggregation_models::RawSearchResult;

 use super::engine_models::{EngineError, SearchEngine};

--- a/src/engines/engine_models.rs
+++ b/src/engines/engine_models.rs
@ -1,7 +1,7 @@
 //! This module provides the error enum to handle different errors associated while requesting data from
 //! the upstream search engines with the search query provided by the user.

-use crate::search_results_handler::aggregation_models::RawSearchResult;
+use crate::results::aggregation_models::RawSearchResult;
 use error_stack::{IntoReport, Result, ResultExt};
 use std::{collections::HashMap, fmt, time::Duration};

@ -14,7 +14,7 @@ use std::{collections::HashMap, fmt, time::Duration};
 /// search engines.
 /// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
 /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
-/// all other errors occuring within the code handling the `upstream search engines`.
+/// all other errors occurring within the code handling the `upstream search engines`.
 #[derive(Debug)]
 pub enum EngineError {
    EmptyResultSet,
--- a/src/engines/searx.rs
+++ b/src/engines/searx.rs
@ -6,7 +6,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
 use scraper::{Html, Selector};
 use std::collections::HashMap;

-use crate::search_results_handler::aggregation_models::RawSearchResult;
+use crate::results::aggregation_models::RawSearchResult;

 use super::engine_models::{EngineError, SearchEngine};
 use error_stack::{IntoReport, Report, Result, ResultExt};
--- a/src/handler/mod.rs
+++ b/src/handler/mod.rs
@ -1 +1 @@
-pub mod public_path_handler;
+pub mod public_paths;
--- a/src/handler/public_path_handler.rs
+++ b/src/handler/public_path_handler.rs
@ -17,15 +17,17 @@ static PUBLIC_DIRECTORY_NAME: &str = "public";
 /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
 /// 2. Under project folder ( or codebase in other words) if it is not present
 ///    here then it returns an error as mentioned above.
-pub fn handle_different_public_path() -> Result<String, Error> {
+pub fn public_path() -> Result<String, Error> {
    if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
-        Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME))
-    } else if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
-        Ok(format!("./{}", PUBLIC_DIRECTORY_NAME))
-    } else {
-        Err(Error::new(
-            std::io::ErrorKind::NotFound,
-            "Themes (public) folder not found!!",
-        ))
+        return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
    }
+
+    if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
+        return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
+    }
+
+    Err(Error::new(
+        std::io::ErrorKind::NotFound,
+        "Themes (public) folder not found!!",
+    ))
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -2,10 +2,10 @@
 //! and register all the routes for the `websurfx` meta search engine website.

 pub mod cache;
-pub mod config_parser;
+pub mod config;
 pub mod engines;
 pub mod handler;
-pub mod search_results_handler;
+pub mod results;
 pub mod server;

 use std::net::TcpListener;
@ -14,9 +14,9 @@ use crate::server::routes;

 use actix_files as fs;
 use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
-use config_parser::parser::Config;
+use config::parser::Config;
 use handlebars::Handlebars;
-use handler::public_path_handler::handle_different_public_path;
+use handler::public_paths::public_path;

 /// Runs the web server on the provided TCP listener and returns a `Server` instance.
 ///
@ -32,7 +32,7 @@ use handler::public_path_handler::handle_different_public_path;
 ///
 /// ```rust
 /// use std::net::TcpListener;
-/// use websurfx::{config_parser::parser::Config, run};
+/// use websurfx::{config::parser::Config, run};
 ///
 /// let config = Config::parse().unwrap();
 /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
@ -41,7 +41,7 @@ use handler::public_path_handler::handle_different_public_path;
 pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
    let mut handlebars: Handlebars = Handlebars::new();

-    let public_folder_path: String = handle_different_public_path()?;
+    let public_folder_path: String = public_path()?;

    handlebars
        .register_templates_directory(".html", format!("{}/templates", public_folder_path))
--- a/src/search_results_handler/aggregation_models.rs
+++ b/src/search_results_handler/aggregation_models.rs
@ -3,7 +3,7 @@

 use serde::{Deserialize, Serialize};

-use crate::{config_parser::parser_models::Style, engines::engine_models::EngineError};
+use crate::{config::parser_models::Style, engines::engine_models::EngineError};

 /// A named struct to store, serialize and deserializes the individual search result from all the
 /// scraped and aggregated search results from the upstream search engines.
--- a/src/search_results_handler/aggregator.rs
+++ b/src/search_results_handler/aggregator.rs
--- a/src/search_results_handler/mod.rs
+++ b/src/search_results_handler/mod.rs
--- a/src/search_results_handler/user_agent.rs
+++ b/src/search_results_handler/user_agent.rs
--- a/src/server/routes.rs
+++ b/src/server/routes.rs
@ -1,14 +1,14 @@
 //! This module provides the functionality to handle different routes of the `websurfx`
-//! meta search engine website and provide approriate response to each route/page
+//! meta search engine website and provide appropriate response to each route/page
 //! when requested.

 use std::fs::read_to_string;

 use crate::{
    cache::cacher::RedisCache,
-    config_parser::parser::Config,
-    handler::public_path_handler::handle_different_public_path,
-    search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate},
+    config::parser::Config,
+    handler::public_paths::public_path,
+    results::{aggregation_models::SearchResults, aggregator::aggregate},
 };
 use actix_web::{get, web, HttpRequest, HttpResponse};
 use handlebars::Handlebars;
@ -87,86 +87,25 @@ pub async fn search(
    config: web::Data<Config>,
 ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
    let params = web::Query::<SearchParams>::from_query(req.query_string())?;
-
-    //Initialize redis cache connection struct
-    let mut redis_cache = RedisCache::new(config.redis_connection_url.clone())?;
    match &params.q {
        Some(query) => {
            if query.trim().is_empty() {
-                Ok(HttpResponse::Found()
+                return Ok(HttpResponse::Found()
                    .insert_header(("location", "/"))
-                    .finish())
-            } else {
-                let page_url: String; // Declare the page_url variable without initializing it
-
-                // ...
-
-                let page = match params.page {
-                    Some(page_number) => {
-                        if page_number <= 1 {
-                            page_url = format!(
-                                "http://{}:{}/search?q={}&page={}",
-                                config.binding_ip_addr, config.port, query, 1
-                            );
-                            1
-                        } else {
-                            page_url = format!(
-                                "http://{}:{}/search?q={}&page={}",
-                                config.binding_ip_addr, config.port, query, page_number
-                            );
-
-                            page_number
-                        }
-                    }
-                    None => {
-                        page_url = format!(
-                            "http://{}:{}{}&page={}",
-                            config.binding_ip_addr,
-                            config.port,
-                            req.uri(),
-                            1
-                        );
-
-                        1
-                    }
-                };
-
-                // fetch the cached results json.
-                let cached_results_json = redis_cache.cached_results_json(&page_url);
-                // check if fetched catch results was indeed fetched or it was an error and if so
-                // handle the data accordingly.
-                match cached_results_json {
-                    Ok(results_json) => {
-                        let new_results_json: SearchResults = serde_json::from_str(&results_json)?;
-                        let page_content: String = hbs.render("search", &new_results_json)?;
-                        Ok(HttpResponse::Ok().body(page_content))
-                    }
-                    Err(_) => {
-                        // check if the cookie value is empty or not if it is empty then use the
-                        // default selected upstream search engines from the config file otherwise
-                        // parse the non-empty cookie and grab the user selected engines from the
-                        // UI and use that.
-                        let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
-                            Some(cookie_value) => {
-                                    let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
-                                    aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, cookie_value.engines).await?
-                            },
-                            None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
-                        };
-                        results_json.add_style(config.style.clone());
-                        // check whether the results grabbed from the upstream engines are empty or
-                        // not if they are empty then set the empty_result_set option to true in
-                        // the result json.
-                        if results_json.is_empty_result_set() {
-                            results_json.set_empty_result_set();
-                        }
-                        redis_cache
-                            .cache_results(serde_json::to_string(&results_json)?, &page_url)?;
-                        let page_content: String = hbs.render("search", &results_json)?;
-                        Ok(HttpResponse::Ok().body(page_content))
-                    }
-                }
+                    .finish());
            }
+            let page = match &params.page {
+                Some(page) => *page,
+                None => 0,
+            };
+
+            let url = format!(
+                "http://{}:{}/search?q={}&page={}",
+                config.binding_ip, config.port, query, page
+            );
+            let results_json = results(url, &config, query.to_string(), page, req).await?;
+            let page_content: String = hbs.render("search", &results_json)?;
+            Ok(HttpResponse::Ok().body(page_content))
        }
        None => Ok(HttpResponse::Found()
            .insert_header(("location", "/"))
@ -174,11 +113,70 @@ pub async fn search(
    }
 }

+/// Fetches the results for a query and page.
+/// First checks the redis cache, if that fails it gets proper results
+async fn results(
+    url: String,
+    config: &Config,
+    query: String,
+    page: u32,
+    req: HttpRequest,
+) -> Result<SearchResults, Box<dyn std::error::Error>> {
+    //Initialize redis cache connection struct
+    let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
+    // fetch the cached results json.
+    let cached_results_json = redis_cache.cached_json(&url);
+    // check if fetched catch results was indeed fetched or it was an error and if so
+    // handle the data accordingly.
+    match cached_results_json {
+        Ok(results_json) => Ok(serde_json::from_str::<SearchResults>(&results_json).unwrap()),
+        Err(_) => {
+            // check if the cookie value is empty or not if it is empty then use the
+            // default selected upstream search engines from the config file otherwise
+            // parse the non-empty cookie and grab the user selected engines from the
+            // UI and use that.
+            let mut results_json: crate::results::aggregation_models::SearchResults = match req
+                .cookie("appCookie")
+            {
+                Some(cookie_value) => {
+                    let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
+                    aggregate(
+                        query,
+                        page,
+                        config.aggregator.random_delay,
+                        config.debug,
+                        cookie_value.engines,
+                    )
+                    .await?
+                }
+                None => {
+                    aggregate(
+                        query,
+                        page,
+                        config.aggregator.random_delay,
+                        config.debug,
+                        config.upstream_search_engines.clone(),
+                    )
+                    .await?
+                }
+            };
+            results_json.add_style(config.style.clone());
+            // check whether the results grabbed from the upstream engines are empty or
+            // not if they are empty then set the empty_result_set option to true in
+            // the result json.
+            if results_json.is_empty_result_set() {
+                results_json.set_empty_result_set();
+            }
+            redis_cache.cache_results(serde_json::to_string(&results_json)?, &url)?;
+            Ok(results_json)
+        }
+    }
+}
+
 /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
 #[get("/robots.txt")]
 pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
-    let page_content: String =
-        read_to_string(format!("{}/robots.txt", handle_different_public_path()?))?;
+    let page_content: String = read_to_string(format!("{}/robots.txt", public_path()?))?;
    Ok(HttpResponse::Ok()
        .content_type("text/plain; charset=ascii")
        .body(page_content))