diff --git a/.gitignore b/.gitignore
index c8d5b9e..5889518 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ package-lock.json
dump.rdb
.vscode
megalinter-reports/
+dhat-heap.json
diff --git a/Cargo.lock b/Cargo.lock
index 2ba60bf..aff845e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -300,12 +300,24 @@ version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
+[[package]]
+name = "arc-swap"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
+
[[package]]
name = "askama_escape"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
+[[package]]
+name = "async-once-cell"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb"
+
[[package]]
name = "async-trait"
version = "0.1.73"
@@ -571,7 +583,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
dependencies = [
"bytes 1.4.0",
+ "futures-core",
"memchr",
+ "pin-project-lite",
+ "tokio 1.32.0",
+ "tokio-util",
]
[[package]]
@@ -845,6 +861,22 @@ dependencies = [
"syn 1.0.109",
]
+[[package]]
+name = "dhat"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3"
+dependencies = [
+ "backtrace",
+ "lazy_static",
+ "mintex",
+ "parking_lot 0.12.1",
+ "rustc-hash",
+ "serde",
+ "serde_json",
+ "thousands",
+]
+
[[package]]
name = "digest"
version = "0.10.7"
@@ -1630,6 +1662,16 @@ version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
+[[package]]
+name = "libmimalloc-sys"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25d058a81af0d1c22d7a1c948576bee6d673f7af3c0f35564abd6c81122f513d"
+dependencies = [
+ "cc",
+ "libc",
+]
+
[[package]]
name = "linux-raw-sys"
version = "0.4.5"
@@ -1767,6 +1809,15 @@ dependencies = [
"autocfg 1.1.0",
]
+[[package]]
+name = "mimalloc"
+version = "0.1.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "972e5f23f6716f62665760b0f4cbf592576a80c7b879ba9beaafc0e558894127"
+dependencies = [
+ "libmimalloc-sys",
+]
+
[[package]]
name = "mime"
version = "0.3.17"
@@ -1792,6 +1843,16 @@ dependencies = [
"adler",
]
+[[package]]
+name = "mintex"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb"
+dependencies = [
+ "once_cell",
+ "sys-info",
+]
+
[[package]]
name = "mio"
version = "0.6.23"
@@ -1835,6 +1896,20 @@ dependencies = [
"ws2_32-sys",
]
+[[package]]
+name = "mlua"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bb37b0ba91f017aa7ca2b98ef99496827770cd635b4a932a6047c5b4bbe678e"
+dependencies = [
+ "bstr",
+ "cc",
+ "num-traits",
+ "once_cell",
+ "pkg-config",
+ "rustc-hash",
+]
+
[[package]]
name = "native-tls"
version = "0.2.11"
@@ -2202,6 +2277,26 @@ dependencies = [
"siphasher 0.3.11",
]
+[[package]]
+name = "pin-project"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
+dependencies = [
+ "pin-project-internal",
+]
+
+[[package]]
+name = "pin-project-internal"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
+dependencies = [
+ "proc-macro2 1.0.66",
+ "quote 1.0.33",
+ "syn 2.0.29",
+]
+
[[package]]
name = "pin-project-lite"
version = "0.2.13"
@@ -2504,12 +2599,21 @@ version = "0.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
dependencies = [
+ "arc-swap",
+ "async-trait",
+ "bytes 1.4.0",
"combine",
+ "futures 0.3.28",
+ "futures-util",
"itoa 1.0.9",
"percent-encoding 2.3.0",
+ "pin-project-lite",
"ryu",
"sha1_smol",
"socket2 0.4.9",
+ "tokio 1.32.0",
+ "tokio-retry",
+ "tokio-util",
"url 2.4.1",
]
@@ -2628,36 +2732,18 @@ dependencies = [
"winreg 0.50.0",
]
-[[package]]
-name = "rlua"
-version = "0.19.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d33e5ba15c3d43178f283ed5863d4531e292fc0e56fb773f3bea45f18e3a42a"
-dependencies = [
- "bitflags 1.3.2",
- "bstr",
- "libc",
- "num-traits",
- "rlua-lua54-sys",
-]
-
-[[package]]
-name = "rlua-lua54-sys"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7aafabafe1895cb4a2be81a56d7ff3d46bf4b5d2f9cfdbea2ed404cdabe96474"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
-]
-
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
[[package]]
name = "rustc_version"
version = "0.2.3"
@@ -2957,6 +3043,9 @@ name = "smallvec"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
+dependencies = [
+ "serde",
+]
[[package]]
name = "socket2"
@@ -3098,6 +3187,16 @@ dependencies = [
"unicode-xid 0.2.4",
]
+[[package]]
+name = "sys-info"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
+dependencies = [
+ "cc",
+ "libc",
+]
+
[[package]]
name = "tempfile"
version = "3.8.0"
@@ -3151,6 +3250,12 @@ dependencies = [
"syn 2.0.29",
]
+[[package]]
+name = "thousands"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
+
[[package]]
name = "time"
version = "0.1.45"
@@ -3335,6 +3440,17 @@ dependencies = [
"tokio-sync",
]
+[[package]]
+name = "tokio-retry"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
+dependencies = [
+ "pin-project",
+ "rand 0.8.5",
+ "tokio 1.32.0",
+]
+
[[package]]
name = "tokio-sync"
version = "0.1.8"
@@ -3688,24 +3804,29 @@ dependencies = [
"actix-files",
"actix-governor",
"actix-web",
+ "async-once-cell",
"async-trait",
"criterion",
+ "dhat",
"env_logger",
"error-stack",
"fake-useragent",
+ "futures 0.3.28",
"handlebars",
"log",
"md5",
+ "mimalloc",
+ "mlua",
"once_cell",
"rand 0.8.5",
"redis",
"regex",
"reqwest 0.11.20",
- "rlua",
"rusty-hook",
"scraper",
"serde",
"serde_json",
+ "smallvec 1.11.0",
"tempfile",
"tokio 1.32.0",
]
diff --git a/Cargo.toml b/Cargo.toml
index e5434f6..4609932 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,7 @@ license = "AGPL-3.0"
[dependencies]
reqwest = {version="0.11.20",features=["json"]}
-tokio = {version="1.32.0",features=["full"]}
+tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
serde = {version="1.0.188",features=["derive"]}
handlebars = { version = "4.4.0", features = ["dir_source"] }
scraper = {version="0.17.1"}
@@ -48,13 +48,17 @@ rpath = false
[profile.release]
opt-level = 3
-debug = false
+debug = false # This should only be commented when testing with dhat profiler
+# debug = 1 # This should only be uncommented when testing with dhat profiler
split-debuginfo = '...'
debug-assertions = false
overflow-checks = false
-lto = 'thin'
+lto = true
panic = 'abort'
incremental = false
-codegen-units = 16
+codegen-units = 1
rpath = false
strip = "debuginfo"
+
+[features]
+dhat-heap = ["dep:dhat"]
diff --git a/Dockerfile b/Dockerfile
index 0c54453..2611aca 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,7 +19,7 @@ COPY . .
RUN cargo install --path .
# We do not need the Rust toolchain to run the binary!
-FROM gcr.io/distroless/cc-debian11
+FROM gcr.io/distroless/cc-debian12
COPY --from=builder /app/public/ /opt/websurfx/public/
COPY --from=builder /app/websurfx/config.lua /etc/xdg/websurfx/config.lua
COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
diff --git a/README.md b/README.md
index 07d73f5..8287c1d 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
Readme |
Discord |
GitHub |
- Documentation
+ Documentation
std::io::Result<()> {
+ // A dhat heap profiler initialization.
+ #[cfg(feature = "dhat-heap")]
+ let _profiler = dhat::Profiler::new_heap();
+
// Initialize the parsed config file.
let config = Config::parse(false).unwrap();
diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs
index 44d0710..b2508b5 100644
--- a/src/cache/cacher.rs
+++ b/src/cache/cacher.rs
@@ -1,17 +1,27 @@
//! This module provides the functionality to cache the aggregated results fetched and aggregated
//! from the upstream search engines in a json format.
+use error_stack::Report;
+use futures::future::try_join_all;
use md5::compute;
-use redis::{Client, Commands, Connection};
+use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
+
+use super::error::PoolError;
/// A named struct which stores the redis Connection url address to which the client will
/// connect to.
///
/// # Fields
///
-/// * `redis_connection_url` - It stores the redis Connection url address.
+/// * `connection_pool` - It stores a pool of connections ready to be used.
+/// * `pool_size` - It stores the size of the connection pool (in other words the number of
+/// connections that should be stored in the pool).
+/// * `current_connection` - It stores the index of which connection is being used at the moment.
+#[derive(Clone)]
pub struct RedisCache {
- connection: Connection,
+ connection_pool: Vec,
+ pool_size: u8,
+ current_connection: u8,
}
impl RedisCache {
@@ -19,11 +29,25 @@ impl RedisCache {
///
/// # Arguments
///
- /// * `redis_connection_url` - It stores the redis Connection url address.
- pub fn new(redis_connection_url: String) -> Result> {
+ /// * `redis_connection_url` - It takes the redis Connection url address.
+ /// * `pool_size` - It takes the size of the connection pool (in other words the number of
+ /// connections that should be stored in the pool).
+ pub async fn new(
+ redis_connection_url: &str,
+ pool_size: u8,
+ ) -> Result> {
let client = Client::open(redis_connection_url)?;
- let connection = client.get_connection()?;
- let redis_cache = RedisCache { connection };
+ let mut tasks: Vec<_> = Vec::new();
+
+ for _ in 0..pool_size {
+ tasks.push(client.get_tokio_connection_manager());
+ }
+
+ let redis_cache = RedisCache {
+ connection_pool: try_join_all(tasks).await?,
+ pool_size,
+ current_connection: Default::default(),
+ };
Ok(redis_cache)
}
@@ -32,7 +56,7 @@ impl RedisCache {
/// # Arguments
///
/// * `url` - It takes an url as string.
- fn hash_url(url: &str) -> String {
+ fn hash_url(&self, url: &str) -> String {
format!("{:?}", compute(url))
}
@@ -41,9 +65,42 @@ impl RedisCache {
/// # Arguments
///
/// * `url` - It takes an url as a string.
- pub fn cached_json(&mut self, url: &str) -> Result> {
- let hashed_url_string = Self::hash_url(url);
- Ok(self.connection.get(hashed_url_string)?)
+ pub async fn cached_json(&mut self, url: &str) -> Result> {
+ self.current_connection = Default::default();
+ let hashed_url_string: &str = &self.hash_url(url);
+
+ let mut result: Result = self.connection_pool
+ [self.current_connection as usize]
+ .get(hashed_url_string)
+ .await;
+
+ // Code to check whether the current connection being used is dropped with connection error
+ // or not. if it drops with the connection error then the current connection is replaced
+ // with a new connection from the pool which is then used to run the redis command then
+ // that connection is also checked whether it is dropped or not if it is not then the
+ // result is passed as a `Result` or else the same process repeats again and if all of the
+ // connections in the pool result in connection drop error then a custom pool error is
+ // returned.
+ loop {
+ match result {
+ Err(error) => match error.is_connection_dropped() {
+ true => {
+ self.current_connection += 1;
+ if self.current_connection == self.pool_size {
+ return Err(Report::new(
+ PoolError::PoolExhaustionWithConnectionDropError,
+ ));
+ }
+ result = self.connection_pool[self.current_connection as usize]
+ .get(hashed_url_string)
+ .await;
+ continue;
+ }
+ false => return Err(Report::new(PoolError::RedisError(error))),
+ },
+ Ok(res) => return Ok(res),
+ }
+ }
}
/// A function which caches the results by using the hashed `url` as the key and
@@ -54,21 +111,45 @@ impl RedisCache {
///
/// * `json_results` - It takes the json results string as an argument.
/// * `url` - It takes the url as a String.
- pub fn cache_results(
+ pub async fn cache_results(
&mut self,
- json_results: String,
+ json_results: &str,
url: &str,
- ) -> Result<(), Box> {
- let hashed_url_string = Self::hash_url(url);
+ ) -> Result<(), Report> {
+ self.current_connection = Default::default();
+ let hashed_url_string: &str = &self.hash_url(url);
- // put results_json into cache
- self.connection.set(&hashed_url_string, json_results)?;
+ let mut result: Result<(), RedisError> = self.connection_pool
+ [self.current_connection as usize]
+ .set_ex(hashed_url_string, json_results, 60)
+ .await;
- // Set the TTL for the key to 60 seconds
- self.connection
- .expire::(hashed_url_string, 60)
- .unwrap();
-
- Ok(())
+ // Code to check whether the current connection being used is dropped with connection error
+ // or not. if it drops with the connection error then the current connection is replaced
+ // with a new connection from the pool which is then used to run the redis command then
+ // that connection is also checked whether it is dropped or not if it is not then the
+ // result is passed as a `Result` or else the same process repeats again and if all of the
+ // connections in the pool result in connection drop error then a custom pool error is
+ // returned.
+ loop {
+ match result {
+ Err(error) => match error.is_connection_dropped() {
+ true => {
+ self.current_connection += 1;
+ if self.current_connection == self.pool_size {
+ return Err(Report::new(
+ PoolError::PoolExhaustionWithConnectionDropError,
+ ));
+ }
+ result = self.connection_pool[self.current_connection as usize]
+ .set_ex(hashed_url_string, json_results, 60)
+ .await;
+ continue;
+ }
+ false => return Err(Report::new(PoolError::RedisError(error))),
+ },
+ Ok(_) => return Ok(()),
+ }
+ }
}
}
diff --git a/src/cache/error.rs b/src/cache/error.rs
new file mode 100644
index 0000000..efd87c9
--- /dev/null
+++ b/src/cache/error.rs
@@ -0,0 +1,40 @@
+//! This module provides the error enum to handle different errors associated while requesting data from
+//! the redis server using an async connection pool.
+use std::fmt;
+
+use redis::RedisError;
+
+/// A custom error type used for handling redis async pool associated errors.
+///
+/// This enum provides variants three different categories of errors:
+/// * `RedisError` - This variant handles all errors related to `RedisError`,
+/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
+/// which occurs when all the connections in the connection pool return a connection
+/// dropped redis error.
+#[derive(Debug)]
+pub enum PoolError {
+ RedisError(RedisError),
+ PoolExhaustionWithConnectionDropError,
+}
+
+impl fmt::Display for PoolError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ PoolError::RedisError(redis_error) => {
+ if let Some(detail) = redis_error.detail() {
+ write!(f, "{}", detail)
+ } else {
+ write!(f, "")
+ }
+ }
+ PoolError::PoolExhaustionWithConnectionDropError => {
+ write!(
+ f,
+ "Error all connections from the pool dropped with connection error"
+ )
+ }
+ }
+ }
+}
+
+impl error_stack::Context for PoolError {}
diff --git a/src/cache/mod.rs b/src/cache/mod.rs
index de7dd4e..03c4155 100644
--- a/src/cache/mod.rs
+++ b/src/cache/mod.rs
@@ -1 +1,2 @@
pub mod cacher;
+pub mod error;
diff --git a/src/config/parser.rs b/src/config/parser.rs
index dbebfd0..2cfc136 100644
--- a/src/config/parser.rs
+++ b/src/config/parser.rs
@@ -5,7 +5,7 @@ use crate::handler::paths::{file_path, FileType};
use super::parser_models::{AggregatorConfig, RateLimiter, Style};
use log::LevelFilter;
-use rlua::Lua;
+use mlua::Lua;
use std::{collections::HashMap, fs, thread::available_parallelism};
/// A named struct which stores the parsed config file options.
@@ -53,30 +53,31 @@ impl Config {
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
/// Config struct with all the parsed config options from the parsed config file.
pub fn parse(logging_initialized: bool) -> Result> {
- Lua::new().context(|context| -> Result> {
- let globals = context.globals();
+ let lua = Lua::new();
+ let globals = lua.globals();
- context
- .load(&fs::read_to_string(file_path(FileType::Config)?)?)
- .exec()?;
+ lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
+ .exec()?;
- let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
+ let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
- let debug: bool = globals.get::<_, bool>("debug")?;
- let logging:bool= globals.get::<_, bool>("logging")?;
+ let debug: bool = globals.get::<_, bool>("debug")?;
+ let logging: bool = globals.get::<_, bool>("logging")?;
- if !logging_initialized {
- set_logging_level(debug, logging);
- }
+ if !logging_initialized {
+ set_logging_level(debug, logging);
+ }
- let threads: u8 = if parsed_threads == 0 {
- let total_num_of_threads: usize = available_parallelism()?.get() / 2;
- log::error!("Config Error: The value of `threads` option should be a non zero positive integer");
- log::error!("Falling back to using {} threads", total_num_of_threads);
- total_num_of_threads as u8
- } else {
- parsed_threads
- };
+ let threads: u8 = if parsed_threads == 0 {
+ let total_num_of_threads: usize = available_parallelism()?.get() / 2;
+ log::error!(
+ "Config Error: The value of `threads` option should be a non zero positive integer"
+ );
+ log::error!("Falling back to using {} threads", total_num_of_threads);
+ total_num_of_threads as u8
+ } else {
+ parsed_threads
+ };
let rate_limter = globals.get::<_,HashMap>("rate_limiter")?;
diff --git a/src/config/parser_models.rs b/src/config/parser_models.rs
index 4a986fd..343b70c 100644
--- a/src/config/parser_models.rs
+++ b/src/config/parser_models.rs
@@ -18,7 +18,7 @@ use serde::{Deserialize, Serialize};
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
/// theme being used.
-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Serialize, Deserialize, Clone, Default)]
pub struct Style {
pub theme: String,
pub colorscheme: String,
diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs
index 11b7d86..c716e8a 100644
--- a/src/engines/duckduckgo.rs
+++ b/src/engines/duckduckgo.rs
@@ -4,14 +4,14 @@
use std::collections::HashMap;
-use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
+use reqwest::header::HeaderMap;
use scraper::{Html, Selector};
use crate::results::aggregation_models::SearchResult;
use super::engine_models::{EngineError, SearchEngine};
-use error_stack::{IntoReport, Report, Result, ResultExt};
+use error_stack::{Report, Result, ResultExt};
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
/// reduce code duplication as well as allows to create vector of different search engines easily.
@@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
/// or HeaderMap fails to initialize.
async fn results(
&self,
- query: String,
+ query: &str,
page: u32,
- user_agent: String,
+ user_agent: &str,
request_timeout: u8,
) -> Result, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
@@ -61,38 +61,19 @@ impl SearchEngine for DuckDuckGo {
};
// initializing HeaderMap and adding appropriate headers.
- let mut header_map = HeaderMap::new();
- header_map.insert(
- USER_AGENT,
- user_agent
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
- header_map.insert(
- REFERER,
- "https://google.com/"
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
- header_map.insert(
- CONTENT_TYPE,
- "application/x-www-form-urlencoded"
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
- header_map.insert(
- COOKIE,
- "kl=wt-wt"
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
+ let header_map = HeaderMap::try_from(&HashMap::from([
+ ("USER_AGENT".to_string(), user_agent.to_string()),
+ ("REFERER".to_string(), "https://google.com/".to_string()),
+ (
+ "CONTENT_TYPE".to_string(),
+ "application/x-www-form-urlencoded".to_string(),
+ ),
+ ("COOKIE".to_string(), "kl=wt-wt".to_string()),
+ ]))
+ .change_context(EngineError::UnexpectedError)?;
let document: Html = Html::parse_document(
- &DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
+ &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
);
let no_result: Selector = Selector::parse(".no-results")
@@ -126,8 +107,7 @@ impl SearchEngine for DuckDuckGo {
.next()
.unwrap()
.inner_html()
- .trim()
- .to_string(),
+ .trim(),
format!(
"https://{}",
result
@@ -136,15 +116,15 @@ impl SearchEngine for DuckDuckGo {
.unwrap()
.inner_html()
.trim()
- ),
+ )
+ .as_str(),
result
.select(&result_desc)
.next()
.unwrap()
.inner_html()
- .trim()
- .to_string(),
- vec!["duckduckgo".to_string()],
+ .trim(),
+ &["duckduckgo"],
)
})
.map(|search_result| (search_result.url.clone(), search_result))
diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs
index d33d13c..86fb207 100644
--- a/src/engines/engine_models.rs
+++ b/src/engines/engine_models.rs
@@ -2,7 +2,7 @@
//! the upstream search engines with the search query provided by the user.
use crate::results::aggregation_models::SearchResult;
-use error_stack::{IntoReport, Result, ResultExt};
+use error_stack::{Result, ResultExt};
use std::{collections::HashMap, fmt, time::Duration};
/// A custom error type used for handle engine associated errors.
@@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
pub trait SearchEngine: Sync + Send {
async fn fetch_html_from_upstream(
&self,
- url: String,
+ url: &str,
header_map: reqwest::header::HeaderMap,
request_timeout: u8,
) -> Result {
@@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
.headers(header_map) // add spoofed headers to emulate human behavior
.send()
.await
- .into_report()
.change_context(EngineError::RequestError)?
.text()
.await
- .into_report()
.change_context(EngineError::RequestError)?)
}
async fn results(
&self,
- query: String,
+ query: &str,
page: u32,
- user_agent: String,
+ user_agent: &str,
request_timeout: u8,
) -> Result, EngineError>;
}
diff --git a/src/engines/searx.rs b/src/engines/searx.rs
index 4ad41f5..ca45cf0 100644
--- a/src/engines/searx.rs
+++ b/src/engines/searx.rs
@@ -2,14 +2,14 @@
//! by querying the upstream searx search engine instance with user provided query and with a page
//! number if provided.
-use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
+use reqwest::header::HeaderMap;
use scraper::{Html, Selector};
use std::collections::HashMap;
use crate::results::aggregation_models::SearchResult;
use super::engine_models::{EngineError, SearchEngine};
-use error_stack::{IntoReport, Report, Result, ResultExt};
+use error_stack::{Report, Result, ResultExt};
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
/// reduce code duplication as well as allows to create vector of different search engines easily.
@@ -38,9 +38,9 @@ impl SearchEngine for Searx {
async fn results(
&self,
- query: String,
+ query: &str,
page: u32,
- user_agent: String,
+ user_agent: &str,
request_timeout: u8,
) -> Result, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
@@ -51,32 +51,16 @@ impl SearchEngine for Searx {
};
// initializing headers and adding appropriate headers.
- let mut header_map = HeaderMap::new();
- header_map.insert(
- USER_AGENT,
- user_agent
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
- header_map.insert(
- REFERER,
- "https://google.com/"
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
- header_map.insert(
- CONTENT_TYPE,
- "application/x-www-form-urlencoded"
- .parse()
- .into_report()
- .change_context(EngineError::UnexpectedError)?,
- );
- header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
+ let header_map = HeaderMap::try_from(&HashMap::from([
+ ("USER_AGENT".to_string(), user_agent.to_string()),
+ ("REFERER".to_string(), "https://google.com/".to_string()),
+ ("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
+ ("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
+ ]))
+ .change_context(EngineError::UnexpectedError)?;
let document: Html = Html::parse_document(
- &Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
+ &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
);
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
@@ -117,24 +101,21 @@ impl SearchEngine for Searx {
.next()
.unwrap()
.inner_html()
- .trim()
- .to_string(),
+ .trim(),
result
.select(&result_url)
.next()
.unwrap()
.value()
.attr("href")
- .unwrap()
- .to_string(),
+ .unwrap(),
result
.select(&result_desc)
.next()
.unwrap()
.inner_html()
- .trim()
- .to_string(),
- vec!["searx".to_string()],
+ .trim(),
+ &["searx"],
)
})
.map(|search_result| (search_result.url.clone(), search_result))
diff --git a/src/handler/paths.rs b/src/handler/paths.rs
index 9b4fa07..91f7f94 100644
--- a/src/handler/paths.rs
+++ b/src/handler/paths.rs
@@ -4,6 +4,7 @@
use std::collections::HashMap;
use std::io::Error;
use std::path::Path;
+use std::sync::OnceLock;
// ------- Constants --------
static PUBLIC_DIRECTORY_NAME: &str = "public";
@@ -20,57 +21,7 @@ pub enum FileType {
Theme,
}
-static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy>> =
- once_cell::sync::Lazy::new(|| {
- HashMap::from([
- (
- FileType::Config,
- vec![
- format!(
- "{}/.config/{}/{}",
- std::env::var("HOME").unwrap(),
- COMMON_DIRECTORY_NAME,
- CONFIG_FILE_NAME
- ),
- format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
- format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
- ],
- ),
- (
- FileType::Theme,
- vec![
- format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
- format!("./{}/", PUBLIC_DIRECTORY_NAME),
- ],
- ),
- (
- FileType::AllowList,
- vec![
- format!(
- "{}/.config/{}/{}",
- std::env::var("HOME").unwrap(),
- COMMON_DIRECTORY_NAME,
- ALLOWLIST_FILE_NAME
- ),
- format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
- format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
- ],
- ),
- (
- FileType::BlockList,
- vec![
- format!(
- "{}/.config/{}/{}",
- std::env::var("HOME").unwrap(),
- COMMON_DIRECTORY_NAME,
- BLOCKLIST_FILE_NAME
- ),
- format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
- format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
- ],
- ),
- ])
- });
+static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock>> = OnceLock::new();
/// A helper function which returns an appropriate config file path checking if the config
/// file exists on that path.
@@ -95,11 +46,64 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy Result {
- let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
+pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
+ let file_path: &Vec = FILE_PATHS_FOR_DIFF_FILE_TYPES
+ .get_or_init(|| {
+ HashMap::from([
+ (
+ FileType::Config,
+ vec![
+ format!(
+ "{}/.config/{}/{}",
+ std::env::var("HOME").unwrap(),
+ COMMON_DIRECTORY_NAME,
+ CONFIG_FILE_NAME
+ ),
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
+ ],
+ ),
+ (
+ FileType::Theme,
+ vec![
+ format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
+ format!("./{}/", PUBLIC_DIRECTORY_NAME),
+ ],
+ ),
+ (
+ FileType::AllowList,
+ vec![
+ format!(
+ "{}/.config/{}/{}",
+ std::env::var("HOME").unwrap(),
+ COMMON_DIRECTORY_NAME,
+ ALLOWLIST_FILE_NAME
+ ),
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
+ ],
+ ),
+ (
+ FileType::BlockList,
+ vec![
+ format!(
+ "{}/.config/{}/{}",
+ std::env::var("HOME").unwrap(),
+ COMMON_DIRECTORY_NAME,
+ BLOCKLIST_FILE_NAME
+ ),
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
+ ],
+ ),
+ ])
+ })
+ .get(&file_type)
+ .unwrap();
+
for (idx, _) in file_path.iter().enumerate() {
if Path::new(file_path[idx].as_str()).exists() {
- return Ok(file_path[idx].clone());
+ return Ok(std::mem::take(&mut &*file_path[idx]));
}
}
diff --git a/src/lib.rs b/src/lib.rs
index be526d9..a1213d6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -43,7 +43,7 @@ use handler::paths::{file_path, FileType};
pub fn run(listener: TcpListener, config: Config) -> std::io::Result {
let mut handlebars: Handlebars = Handlebars::new();
- let public_folder_path: String = file_path(FileType::Theme)?;
+ let public_folder_path: &str = file_path(FileType::Theme)?;
handlebars
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs
index e985765..11b2e63 100644
--- a/src/results/aggregation_models.rs
+++ b/src/results/aggregation_models.rs
@@ -2,6 +2,7 @@
//! data scraped from the upstream search engines.
use serde::{Deserialize, Serialize};
+use smallvec::SmallVec;
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
@@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
/// (href url in html in simple words).
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub title: String,
pub url: String,
pub description: String,
- pub engine: Vec,
+ pub engine: SmallVec<[String; 0]>,
}
impl SearchResult {
@@ -35,12 +36,12 @@ impl SearchResult {
/// (href url in html in simple words).
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
- pub fn new(title: String, url: String, description: String, engine: Vec) -> Self {
+ pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
SearchResult {
- title,
- url,
- description,
- engine,
+ title: title.to_owned(),
+ url: url.to_owned(),
+ description: description.to_owned(),
+ engine: engine.iter().map(|name| name.to_string()).collect(),
}
}
@@ -49,8 +50,8 @@ impl SearchResult {
/// # Arguments
///
/// * `engine` - Takes an engine name provided as a String.
- pub fn add_engines(&mut self, engine: String) {
- self.engine.push(engine)
+ pub fn add_engines(&mut self, engine: &str) {
+ self.engine.push(engine.to_owned())
}
/// A function which returns the engine name stored from the struct as a string.
@@ -58,13 +59,12 @@ impl SearchResult {
/// # Returns
///
/// An engine name stored as a string from the struct.
- pub fn engine(self) -> String {
- self.engine.get(0).unwrap().to_string()
+ pub fn engine(&mut self) -> String {
+ std::mem::take(&mut self.engine[0])
}
}
-///
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Clone)]
pub struct EngineErrorInfo {
pub error: String,
pub engine: String,
@@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
}
impl EngineErrorInfo {
- pub fn new(error: &EngineError, engine: String) -> Self {
+ pub fn new(error: &EngineError, engine: &str) -> Self {
Self {
error: match error {
- EngineError::RequestError => String::from("RequestError"),
- EngineError::EmptyResultSet => String::from("EmptyResultSet"),
- EngineError::UnexpectedError => String::from("UnexpectedError"),
+ EngineError::RequestError => "RequestError".to_owned(),
+ EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
+ EngineError::UnexpectedError => "UnexpectedError".to_owned(),
},
- engine,
+ engine: engine.to_owned(),
severity_color: match error {
- EngineError::RequestError => String::from("green"),
- EngineError::EmptyResultSet => String::from("blue"),
- EngineError::UnexpectedError => String::from("red"),
+ EngineError::RequestError => "green".to_owned(),
+ EngineError::EmptyResultSet => "blue".to_owned(),
+ EngineError::UnexpectedError => "red".to_owned(),
},
}
}
@@ -108,7 +108,7 @@ pub struct SearchResults {
pub results: Vec,
pub page_query: String,
pub style: Style,
- pub engine_errors_info: Vec,
+ pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
}
impl SearchResults {
@@ -124,19 +124,19 @@ impl SearchResults {
/// given search query.
pub fn new(
results: Vec,
- page_query: String,
- engine_errors_info: Vec,
+ page_query: &str,
+ engine_errors_info: &[EngineErrorInfo],
) -> Self {
- SearchResults {
+ Self {
results,
- page_query,
- style: Style::new("".to_string(), "".to_string()),
- engine_errors_info,
+ page_query: page_query.to_owned(),
+ style: Style::default(),
+ engine_errors_info: SmallVec::from(engine_errors_info),
}
}
/// A setter function to add website style to the return search results.
- pub fn add_style(&mut self, style: Style) {
- self.style = style;
+ pub fn add_style(&mut self, style: &Style) {
+ self.style = style.to_owned();
}
}
diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs
index 3f06ecb..1942acc 100644
--- a/src/results/aggregator.rs
+++ b/src/results/aggregator.rs
@@ -64,14 +64,14 @@ type FutureVec = Vec, Report,
+ upstream_search_engines: &[EngineHandler],
request_timeout: u8,
) -> Result> {
- let user_agent: String = random_user_agent();
+ let user_agent: &str = random_user_agent();
// Add a random delay before making the request.
if random_delay || !debug {
@@ -80,19 +80,18 @@ pub async fn aggregate(
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
}
- let mut names: Vec<&str> = vec![];
+ let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching
let mut tasks: FutureVec = FutureVec::new();
for engine_handler in upstream_search_engines {
- let (name, search_engine) = engine_handler.into_name_engine();
+ let (name, search_engine) = engine_handler.to_owned().into_name_engine();
names.push(name);
- let query: String = query.clone();
- let user_agent: String = user_agent.clone();
+ let query: String = query.to_owned();
tasks.push(tokio::spawn(async move {
search_engine
- .results(query, page, user_agent.clone(), request_timeout)
+ .results(&query, page, user_agent, request_timeout)
.await
}));
}
@@ -110,7 +109,7 @@ pub async fn aggregate(
let mut result_map: HashMap = HashMap::new();
let mut engine_errors_info: Vec = Vec::new();
- let mut handle_error = |error: Report, engine_name: String| {
+ let mut handle_error = |error: &Report, engine_name: &'static str| {
log::error!("Engine Error: {:?}", error);
engine_errors_info.push(EngineErrorInfo::new(
error.downcast_ref::().unwrap(),
@@ -120,7 +119,7 @@ pub async fn aggregate(
for _ in 0..responses.len() {
let response = responses.pop().unwrap();
- let engine = names.pop().unwrap().to_string();
+ let engine = names.pop().unwrap();
if result_map.is_empty() {
match response {
@@ -128,7 +127,7 @@ pub async fn aggregate(
result_map = results.clone();
}
Err(error) => {
- handle_error(error, engine);
+ handle_error(&error, engine);
}
}
continue;
@@ -140,13 +139,13 @@ pub async fn aggregate(
result_map
.entry(key)
.and_modify(|result| {
- result.add_engines(engine.clone());
+ result.add_engines(engine);
})
.or_insert_with(|| -> SearchResult { value });
});
}
Err(error) => {
- handle_error(error, engine);
+ handle_error(&error, engine);
}
}
}
@@ -155,24 +154,20 @@ pub async fn aggregate(
filter_with_lists(
&mut result_map,
&mut blacklist_map,
- &file_path(FileType::BlockList)?,
+ file_path(FileType::BlockList)?,
)?;
filter_with_lists(
&mut blacklist_map,
&mut result_map,
- &file_path(FileType::AllowList)?,
+ file_path(FileType::AllowList)?,
)?;
drop(blacklist_map);
let results: Vec = result_map.into_values().collect();
- Ok(SearchResults::new(
- results,
- query.to_string(),
- engine_errors_info,
- ))
+ Ok(SearchResults::new(results, query, &engine_errors_info))
}
/// Filters a map of search results using a list of regex patterns.
@@ -203,7 +198,10 @@ pub fn filter_with_lists(
|| re.is_match(&search_result.description.to_lowercase())
{
// If the search result matches the regex pattern, move it from the original map to the resultant map
- resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
+ resultant_map.insert(
+ url.to_owned(),
+ map_to_be_filtered.remove(&url.to_owned()).unwrap(),
+ );
}
}
}
@@ -214,6 +212,7 @@ pub fn filter_with_lists(
#[cfg(test)]
mod tests {
use super::*;
+ use smallvec::smallvec;
use std::collections::HashMap;
use std::io::Write;
use tempfile::NamedTempFile;
@@ -223,22 +222,22 @@ mod tests {
// Create a map of search results to filter
let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.insert(
- "https://www.example.com".to_string(),
+ "https://www.example.com".to_owned(),
SearchResult {
- title: "Example Domain".to_string(),
- url: "https://www.example.com".to_string(),
+ title: "Example Domain".to_owned(),
+ url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents."
- .to_string(),
- engine: vec!["Google".to_string(), "Bing".to_string()],
+ .to_owned(),
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
},
);
map_to_be_filtered.insert(
- "https://www.rust-lang.org/".to_string(),
+ "https://www.rust-lang.org/".to_owned(),
SearchResult {
- title: "Rust Programming Language".to_string(),
- url: "https://www.rust-lang.org/".to_string(),
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
+ title: "Rust Programming Language".to_owned(),
+ url: "https://www.rust-lang.org/".to_owned(),
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
},
);
@@ -267,22 +266,22 @@ mod tests {
fn test_filter_with_lists_wildcard() -> Result<(), Box> {
let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.insert(
- "https://www.example.com".to_string(),
+ "https://www.example.com".to_owned(),
SearchResult {
- title: "Example Domain".to_string(),
- url: "https://www.example.com".to_string(),
+ title: "Example Domain".to_owned(),
+ url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents."
- .to_string(),
- engine: vec!["Google".to_string(), "Bing".to_string()],
+ .to_owned(),
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
},
);
map_to_be_filtered.insert(
- "https://www.rust-lang.org/".to_string(),
+ "https://www.rust-lang.org/".to_owned(),
SearchResult {
- title: "Rust Programming Language".to_string(),
- url: "https://www.rust-lang.org/".to_string(),
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
+ title: "Rust Programming Language".to_owned(),
+ url: "https://www.rust-lang.org/".to_owned(),
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
},
);
@@ -327,13 +326,13 @@ mod tests {
fn test_filter_with_lists_invalid_regex() {
let mut map_to_be_filtered = HashMap::new();
map_to_be_filtered.insert(
- "https://www.example.com".to_string(),
+ "https://www.example.com".to_owned(),
SearchResult {
- title: "Example Domain".to_string(),
- url: "https://www.example.com".to_string(),
+ title: "Example Domain".to_owned(),
+ url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents."
- .to_string(),
- engine: vec!["Google".to_string(), "Bing".to_string()],
+ .to_owned(),
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
},
);
diff --git a/src/results/user_agent.rs b/src/results/user_agent.rs
index 13166bf..3bfa05b 100644
--- a/src/results/user_agent.rs
+++ b/src/results/user_agent.rs
@@ -1,28 +1,32 @@
//! This module provides the functionality to generate random user agent string.
+use std::sync::OnceLock;
+
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
-static USER_AGENTS: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| {
- UserAgentsBuilder::new()
- .cache(false)
- .dir("/tmp")
- .thread(1)
- .set_browsers(
- Browsers::new()
- .set_chrome()
- .set_safari()
- .set_edge()
- .set_firefox()
- .set_mozilla(),
- )
- .build()
-});
+static USER_AGENTS: OnceLock = OnceLock::new();
/// A function to generate random user agent to improve privacy of the user.
///
/// # Returns
///
/// A randomly generated user agent string.
-pub fn random_user_agent() -> String {
- USER_AGENTS.random().to_string()
+pub fn random_user_agent() -> &'static str {
+ USER_AGENTS
+ .get_or_init(|| {
+ UserAgentsBuilder::new()
+ .cache(false)
+ .dir("/tmp")
+ .thread(1)
+ .set_browsers(
+ Browsers::new()
+ .set_chrome()
+ .set_safari()
+ .set_edge()
+ .set_firefox()
+ .set_mozilla(),
+ )
+ .build()
+ })
+ .random()
}
diff --git a/src/server/routes.rs b/src/server/routes.rs
index 8910f8f..e17a452 100644
--- a/src/server/routes.rs
+++ b/src/server/routes.rs
@@ -16,6 +16,10 @@ use handlebars::Handlebars;
use serde::Deserialize;
use tokio::join;
+// ---- Constants ----
+/// Initialize redis cache connection once and store it on the heap.
+const REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::OnceCell::new();
+
/// A named struct which deserializes all the user provided search parameters and stores them.
///
/// # Fields
@@ -62,10 +66,10 @@ pub async fn not_found(
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
#[allow(dead_code)]
#[derive(Deserialize)]
-struct Cookie {
- theme: String,
- colorscheme: String,
- engines: Vec,
+struct Cookie<'a> {
+ theme: &'a str,
+ colorscheme: &'a str,
+ engines: Vec<&'a str>,
}
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
@@ -111,9 +115,9 @@ pub async fn search(
page - 1
),
&config,
- query.to_string(),
+ query,
page - 1,
- req.clone(),
+ &req,
),
results(
format!(
@@ -121,9 +125,9 @@ pub async fn search(
config.binding_ip, config.port, query, page
),
&config,
- query.to_string(),
+ query,
page,
- req.clone(),
+ &req,
),
results(
format!(
@@ -134,9 +138,9 @@ pub async fn search(
page + 1
),
&config,
- query.to_string(),
+ query,
page + 1,
- req.clone(),
+ &req,
)
);
@@ -154,30 +158,35 @@ pub async fn search(
async fn results(
url: String,
config: &Config,
- query: String,
+ query: &str,
page: u32,
- req: HttpRequest,
+ req: &HttpRequest,
) -> Result> {
- //Initialize redis cache connection struct
- let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
+ let redis_cache: RedisCache = REDIS_CACHE
+ .get_or_init(async {
+ // Initialize redis cache connection pool only one and store it in the heap.
+ RedisCache::new(&config.redis_url, 5).await.unwrap()
+ })
+ .await
+ .clone();
+
// fetch the cached results json.
- let cached_results_json = redis_cache.cached_json(&url);
+ let cached_results_json: Result> =
+ redis_cache.clone().cached_json(&url).await;
// check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly.
match cached_results_json {
- Ok(results) => Ok(serde_json::from_str::(&results).unwrap()),
+ Ok(results) => Ok(serde_json::from_str::(&results)?),
Err(_) => {
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
// UI and use that.
- let mut results: crate::results::aggregation_models::SearchResults = match req
- .cookie("appCookie")
- {
+ let mut results: SearchResults = match req.cookie("appCookie") {
Some(cookie_value) => {
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
- let engines = cookie_value
+ let engines: Vec = cookie_value
.engines
.iter()
.filter_map(|name| EngineHandler::new(name))
@@ -188,7 +197,7 @@ async fn results(
page,
config.aggregator.random_delay,
config.debug,
- engines,
+ &engines,
config.request_timeout,
)
.await?
@@ -199,14 +208,18 @@ async fn results(
page,
config.aggregator.random_delay,
config.debug,
- config.upstream_search_engines.clone(),
+ &config.upstream_search_engines,
config.request_timeout,
)
.await?
}
};
- results.add_style(config.style.clone());
- redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
+
+ results.add_style(&config.style);
+ redis_cache
+ .clone()
+ .cache_results(&serde_json::to_string(&results)?, &url)
+ .await?;
Ok(results)
}
}