Merge branch 'rolling' into feat-disallow-user-to-search-via-lists
This commit is contained in:
commit
60b2fcc27a
20 changed files with 628 additions and 327 deletions
|
@ -3,9 +3,19 @@
|
|||
//! This module contains the main function which handles the logging of the application to the
|
||||
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
|
||||
|
||||
use mimalloc::MiMalloc;
|
||||
use std::net::TcpListener;
|
||||
use websurfx::{config::parser::Config, run};
|
||||
|
||||
/// A dhat heap memory profiler
|
||||
#[cfg(feature = "dhat-heap")]
|
||||
#[global_allocator]
|
||||
static ALLOC: dhat::Alloc = dhat::Alloc;
|
||||
|
||||
#[cfg(not(feature = "dhat-heap"))]
|
||||
#[global_allocator]
|
||||
static GLOBAL: MiMalloc = MiMalloc;
|
||||
|
||||
/// The function that launches the main server and registers all the routes of the website.
|
||||
///
|
||||
/// # Error
|
||||
|
@ -14,6 +24,10 @@ use websurfx::{config::parser::Config, run};
|
|||
/// available for being used for other applications.
|
||||
#[actix_web::main]
|
||||
async fn main() -> std::io::Result<()> {
|
||||
// A dhat heap profiler initialization.
|
||||
#[cfg(feature = "dhat-heap")]
|
||||
let _profiler = dhat::Profiler::new_heap();
|
||||
|
||||
// Initialize the parsed config file.
|
||||
let config = Config::parse(false).unwrap();
|
||||
|
||||
|
|
127
src/cache/cacher.rs
vendored
127
src/cache/cacher.rs
vendored
|
@ -1,17 +1,27 @@
|
|||
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
||||
//! from the upstream search engines in a json format.
|
||||
|
||||
use error_stack::Report;
|
||||
use futures::future::try_join_all;
|
||||
use md5::compute;
|
||||
use redis::{Client, Commands, Connection};
|
||||
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
||||
|
||||
use super::error::PoolError;
|
||||
|
||||
/// A named struct which stores the redis Connection url address to which the client will
|
||||
/// connect to.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `redis_connection_url` - It stores the redis Connection url address.
|
||||
/// * `connection_pool` - It stores a pool of connections ready to be used.
|
||||
/// * `pool_size` - It stores the size of the connection pool (in other words the number of
|
||||
/// connections that should be stored in the pool).
|
||||
/// * `current_connection` - It stores the index of which connection is being used at the moment.
|
||||
#[derive(Clone)]
|
||||
pub struct RedisCache {
|
||||
connection: Connection,
|
||||
connection_pool: Vec<ConnectionManager>,
|
||||
pool_size: u8,
|
||||
current_connection: u8,
|
||||
}
|
||||
|
||||
impl RedisCache {
|
||||
|
@ -19,11 +29,25 @@ impl RedisCache {
|
|||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `redis_connection_url` - It stores the redis Connection url address.
|
||||
pub fn new(redis_connection_url: String) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
/// * `redis_connection_url` - It takes the redis Connection url address.
|
||||
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
|
||||
/// connections that should be stored in the pool).
|
||||
pub async fn new(
|
||||
redis_connection_url: &str,
|
||||
pool_size: u8,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let client = Client::open(redis_connection_url)?;
|
||||
let connection = client.get_connection()?;
|
||||
let redis_cache = RedisCache { connection };
|
||||
let mut tasks: Vec<_> = Vec::new();
|
||||
|
||||
for _ in 0..pool_size {
|
||||
tasks.push(client.get_tokio_connection_manager());
|
||||
}
|
||||
|
||||
let redis_cache = RedisCache {
|
||||
connection_pool: try_join_all(tasks).await?,
|
||||
pool_size,
|
||||
current_connection: Default::default(),
|
||||
};
|
||||
Ok(redis_cache)
|
||||
}
|
||||
|
||||
|
@ -32,7 +56,7 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes an url as string.
|
||||
fn hash_url(url: &str) -> String {
|
||||
fn hash_url(&self, url: &str) -> String {
|
||||
format!("{:?}", compute(url))
|
||||
}
|
||||
|
||||
|
@ -41,9 +65,42 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes an url as a string.
|
||||
pub fn cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = Self::hash_url(url);
|
||||
Ok(self.connection.get(hashed_url_string)?)
|
||||
pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<PoolError>> {
|
||||
self.current_connection = Default::default();
|
||||
let hashed_url_string: &str = &self.hash_url(url);
|
||||
|
||||
let mut result: Result<String, RedisError> = self.connection_pool
|
||||
[self.current_connection as usize]
|
||||
.get(hashed_url_string)
|
||||
.await;
|
||||
|
||||
// Code to check whether the current connection being used is dropped with connection error
|
||||
// or not. if it drops with the connection error then the current connection is replaced
|
||||
// with a new connection from the pool which is then used to run the redis command then
|
||||
// that connection is also checked whether it is dropped or not if it is not then the
|
||||
// result is passed as a `Result` or else the same process repeats again and if all of the
|
||||
// connections in the pool result in connection drop error then a custom pool error is
|
||||
// returned.
|
||||
loop {
|
||||
match result {
|
||||
Err(error) => match error.is_connection_dropped() {
|
||||
true => {
|
||||
self.current_connection += 1;
|
||||
if self.current_connection == self.pool_size {
|
||||
return Err(Report::new(
|
||||
PoolError::PoolExhaustionWithConnectionDropError,
|
||||
));
|
||||
}
|
||||
result = self.connection_pool[self.current_connection as usize]
|
||||
.get(hashed_url_string)
|
||||
.await;
|
||||
continue;
|
||||
}
|
||||
false => return Err(Report::new(PoolError::RedisError(error))),
|
||||
},
|
||||
Ok(res) => return Ok(res),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A function which caches the results by using the hashed `url` as the key and
|
||||
|
@ -54,21 +111,45 @@ impl RedisCache {
|
|||
///
|
||||
/// * `json_results` - It takes the json results string as an argument.
|
||||
/// * `url` - It takes the url as a String.
|
||||
pub fn cache_results(
|
||||
pub async fn cache_results(
|
||||
&mut self,
|
||||
json_results: String,
|
||||
json_results: &str,
|
||||
url: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = Self::hash_url(url);
|
||||
) -> Result<(), Report<PoolError>> {
|
||||
self.current_connection = Default::default();
|
||||
let hashed_url_string: &str = &self.hash_url(url);
|
||||
|
||||
// put results_json into cache
|
||||
self.connection.set(&hashed_url_string, json_results)?;
|
||||
let mut result: Result<(), RedisError> = self.connection_pool
|
||||
[self.current_connection as usize]
|
||||
.set_ex(hashed_url_string, json_results, 60)
|
||||
.await;
|
||||
|
||||
// Set the TTL for the key to 60 seconds
|
||||
self.connection
|
||||
.expire::<String, u32>(hashed_url_string, 60)
|
||||
.unwrap();
|
||||
|
||||
Ok(())
|
||||
// Code to check whether the current connection being used is dropped with connection error
|
||||
// or not. if it drops with the connection error then the current connection is replaced
|
||||
// with a new connection from the pool which is then used to run the redis command then
|
||||
// that connection is also checked whether it is dropped or not if it is not then the
|
||||
// result is passed as a `Result` or else the same process repeats again and if all of the
|
||||
// connections in the pool result in connection drop error then a custom pool error is
|
||||
// returned.
|
||||
loop {
|
||||
match result {
|
||||
Err(error) => match error.is_connection_dropped() {
|
||||
true => {
|
||||
self.current_connection += 1;
|
||||
if self.current_connection == self.pool_size {
|
||||
return Err(Report::new(
|
||||
PoolError::PoolExhaustionWithConnectionDropError,
|
||||
));
|
||||
}
|
||||
result = self.connection_pool[self.current_connection as usize]
|
||||
.set_ex(hashed_url_string, json_results, 60)
|
||||
.await;
|
||||
continue;
|
||||
}
|
||||
false => return Err(Report::new(PoolError::RedisError(error))),
|
||||
},
|
||||
Ok(_) => return Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
40
src/cache/error.rs
vendored
Normal file
40
src/cache/error.rs
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||
//! the redis server using an async connection pool.
|
||||
use std::fmt;
|
||||
|
||||
use redis::RedisError;
|
||||
|
||||
/// A custom error type used for handling redis async pool associated errors.
|
||||
///
|
||||
/// This enum provides variants three different categories of errors:
|
||||
/// * `RedisError` - This variant handles all errors related to `RedisError`,
|
||||
/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
|
||||
/// which occurs when all the connections in the connection pool return a connection
|
||||
/// dropped redis error.
|
||||
#[derive(Debug)]
|
||||
pub enum PoolError {
|
||||
RedisError(RedisError),
|
||||
PoolExhaustionWithConnectionDropError,
|
||||
}
|
||||
|
||||
impl fmt::Display for PoolError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
PoolError::RedisError(redis_error) => {
|
||||
if let Some(detail) = redis_error.detail() {
|
||||
write!(f, "{}", detail)
|
||||
} else {
|
||||
write!(f, "")
|
||||
}
|
||||
}
|
||||
PoolError::PoolExhaustionWithConnectionDropError => {
|
||||
write!(
|
||||
f,
|
||||
"Error all connections from the pool dropped with connection error"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error_stack::Context for PoolError {}
|
1
src/cache/mod.rs
vendored
1
src/cache/mod.rs
vendored
|
@ -1 +1,2 @@
|
|||
pub mod cacher;
|
||||
pub mod error;
|
||||
|
|
|
@ -5,7 +5,7 @@ use crate::handler::paths::{file_path, FileType};
|
|||
|
||||
use super::parser_models::Style;
|
||||
use log::LevelFilter;
|
||||
use rlua::Lua;
|
||||
use mlua::Lua;
|
||||
use std::{collections::HashMap, fs, thread::available_parallelism};
|
||||
|
||||
/// A named struct which stores the parsed config file options.
|
||||
|
@ -64,30 +64,31 @@ impl Config {
|
|||
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
|
||||
/// Config struct with all the parsed config options from the parsed config file.
|
||||
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Lua::new().context(|context| -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let globals = context.globals();
|
||||
let lua = Lua::new();
|
||||
let globals = lua.globals();
|
||||
|
||||
context
|
||||
.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
||||
.exec()?;
|
||||
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
||||
.exec()?;
|
||||
|
||||
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
||||
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
||||
|
||||
let debug: bool = globals.get::<_, bool>("debug")?;
|
||||
let logging:bool= globals.get::<_, bool>("logging")?;
|
||||
let debug: bool = globals.get::<_, bool>("debug")?;
|
||||
let logging: bool = globals.get::<_, bool>("logging")?;
|
||||
|
||||
if !logging_initialized {
|
||||
set_logging_level(debug, logging);
|
||||
}
|
||||
if !logging_initialized {
|
||||
set_logging_level(debug, logging);
|
||||
}
|
||||
|
||||
let threads: u8 = if parsed_threads == 0 {
|
||||
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
|
||||
log::error!("Config Error: The value of `threads` option should be a non zero positive integer");
|
||||
log::error!("Falling back to using {} threads", total_num_of_threads);
|
||||
total_num_of_threads as u8
|
||||
} else {
|
||||
parsed_threads
|
||||
};
|
||||
let threads: u8 = if parsed_threads == 0 {
|
||||
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
|
||||
log::error!(
|
||||
"Config Error: The value of `threads` option should be a non zero positive integer"
|
||||
);
|
||||
log::error!("Falling back to using {} threads", total_num_of_threads);
|
||||
total_num_of_threads as u8
|
||||
} else {
|
||||
parsed_threads
|
||||
};
|
||||
|
||||
let parsed_safe_search:u8 = globals.get::<_,u8>("safe_search")?;
|
||||
let safe_search: u8 = match parsed_safe_search {
|
||||
|
|
|
@ -4,14 +4,14 @@
|
|||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use reqwest::header::HeaderMap;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
|
||||
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||
|
@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
|
|||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
|
@ -62,38 +62,19 @@ impl SearchEngine for DuckDuckGo {
|
|||
};
|
||||
|
||||
// initializing HeaderMap and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
user_agent
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
REFERER,
|
||||
"https://google.com/"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
CONTENT_TYPE,
|
||||
"application/x-www-form-urlencoded"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
COOKIE,
|
||||
"kl=wt-wt"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||
(
|
||||
"CONTENT_TYPE".to_string(),
|
||||
"application/x-www-form-urlencoded".to_string(),
|
||||
),
|
||||
("COOKIE".to_string(), "kl=wt-wt".to_string()),
|
||||
]))
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let document: Html = Html::parse_document(
|
||||
&DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
||||
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||
);
|
||||
|
||||
let no_result: Selector = Selector::parse(".no-results")
|
||||
|
@ -127,8 +108,7 @@ impl SearchEngine for DuckDuckGo {
|
|||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
.trim(),
|
||||
format!(
|
||||
"https://{}",
|
||||
result
|
||||
|
@ -137,15 +117,15 @@ impl SearchEngine for DuckDuckGo {
|
|||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
),
|
||||
)
|
||||
.as_str(),
|
||||
result
|
||||
.select(&result_desc)
|
||||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
vec!["duckduckgo".to_string()],
|
||||
.trim(),
|
||||
&["duckduckgo"],
|
||||
)
|
||||
})
|
||||
.map(|search_result| (search_result.url.clone(), search_result))
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
//! the upstream search engines with the search query provided by the user.
|
||||
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
use error_stack::{IntoReport, Result, ResultExt};
|
||||
use error_stack::{Result, ResultExt};
|
||||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
|
@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
|
|||
pub trait SearchEngine: Sync + Send {
|
||||
async fn fetch_html_from_upstream(
|
||||
&self,
|
||||
url: String,
|
||||
url: &str,
|
||||
header_map: reqwest::header::HeaderMap,
|
||||
request_timeout: u8,
|
||||
) -> Result<String, EngineError> {
|
||||
|
@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
|
|||
.headers(header_map) // add spoofed headers to emulate human behavior
|
||||
.send()
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?
|
||||
.text()
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?)
|
||||
}
|
||||
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
|
|
|
@ -2,14 +2,14 @@
|
|||
//! by querying the upstream searx search engine instance with user provided query and with a page
|
||||
//! number if provided.
|
||||
|
||||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use reqwest::header::HeaderMap;
|
||||
use scraper::{Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
use error_stack::{Report, Result, ResultExt};
|
||||
|
||||
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
||||
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
||||
|
@ -38,9 +38,9 @@ impl SearchEngine for Searx {
|
|||
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
user_agent: &str,
|
||||
request_timeout: u8,
|
||||
mut safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
|
@ -60,32 +60,16 @@ impl SearchEngine for Searx {
|
|||
};
|
||||
|
||||
// initializing headers and adding appropriate headers.
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
user_agent
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
REFERER,
|
||||
"https://google.com/"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(
|
||||
CONTENT_TYPE,
|
||||
"application/x-www-form-urlencoded"
|
||||
.parse()
|
||||
.into_report()
|
||||
.change_context(EngineError::UnexpectedError)?,
|
||||
);
|
||||
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
|
||||
let header_map = HeaderMap::try_from(&HashMap::from([
|
||||
("USER_AGENT".to_string(), user_agent.to_string()),
|
||||
("REFERER".to_string(), "https://google.com/".to_string()),
|
||||
("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
|
||||
("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
|
||||
]))
|
||||
.change_context(EngineError::UnexpectedError)?;
|
||||
|
||||
let document: Html = Html::parse_document(
|
||||
&Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
||||
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
||||
);
|
||||
|
||||
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
||||
|
@ -126,24 +110,21 @@ impl SearchEngine for Searx {
|
|||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
.trim(),
|
||||
result
|
||||
.select(&result_url)
|
||||
.next()
|
||||
.unwrap()
|
||||
.value()
|
||||
.attr("href")
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
.unwrap(),
|
||||
result
|
||||
.select(&result_desc)
|
||||
.next()
|
||||
.unwrap()
|
||||
.inner_html()
|
||||
.trim()
|
||||
.to_string(),
|
||||
vec!["searx".to_string()],
|
||||
.trim(),
|
||||
&["searx"],
|
||||
)
|
||||
})
|
||||
.map(|search_result| (search_result.url.clone(), search_result))
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
use std::collections::HashMap;
|
||||
use std::io::Error;
|
||||
use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
// ------- Constants --------
|
||||
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
||||
|
@ -20,57 +21,7 @@ pub enum FileType {
|
|||
Theme,
|
||||
}
|
||||
|
||||
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
|
||||
once_cell::sync::Lazy::new(|| {
|
||||
HashMap::from([
|
||||
(
|
||||
FileType::Config,
|
||||
vec![
|
||||
format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
CONFIG_FILE_NAME
|
||||
),
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
||||
],
|
||||
),
|
||||
(
|
||||
FileType::Theme,
|
||||
vec![
|
||||
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
||||
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
||||
],
|
||||
),
|
||||
(
|
||||
FileType::AllowList,
|
||||
vec![
|
||||
format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
ALLOWLIST_FILE_NAME
|
||||
),
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
||||
],
|
||||
),
|
||||
(
|
||||
FileType::BlockList,
|
||||
vec![
|
||||
format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
BLOCKLIST_FILE_NAME
|
||||
),
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
||||
],
|
||||
),
|
||||
])
|
||||
});
|
||||
static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
|
||||
|
||||
/// A helper function which returns an appropriate config file path checking if the config
|
||||
/// file exists on that path.
|
||||
|
@ -95,11 +46,64 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, V
|
|||
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
||||
/// 2. Under project folder ( or codebase in other words) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
pub fn file_path(file_type: FileType) -> Result<String, Error> {
|
||||
let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
|
||||
pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
|
||||
let file_path: &Vec<String> = FILE_PATHS_FOR_DIFF_FILE_TYPES
|
||||
.get_or_init(|| {
|
||||
HashMap::from([
|
||||
(
|
||||
FileType::Config,
|
||||
vec![
|
||||
format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
CONFIG_FILE_NAME
|
||||
),
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
||||
],
|
||||
),
|
||||
(
|
||||
FileType::Theme,
|
||||
vec![
|
||||
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
||||
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
||||
],
|
||||
),
|
||||
(
|
||||
FileType::AllowList,
|
||||
vec![
|
||||
format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
ALLOWLIST_FILE_NAME
|
||||
),
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
||||
],
|
||||
),
|
||||
(
|
||||
FileType::BlockList,
|
||||
vec![
|
||||
format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
BLOCKLIST_FILE_NAME
|
||||
),
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
||||
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
||||
],
|
||||
),
|
||||
])
|
||||
})
|
||||
.get(&file_type)
|
||||
.unwrap();
|
||||
|
||||
for (idx, _) in file_path.iter().enumerate() {
|
||||
if Path::new(file_path[idx].as_str()).exists() {
|
||||
return Ok(file_path[idx].clone());
|
||||
return Ok(std::mem::take(&mut &*file_path[idx]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
10
src/lib.rs
10
src/lib.rs
|
@ -14,7 +14,12 @@ use crate::server::routes;
|
|||
|
||||
use actix_cors::Cors;
|
||||
use actix_files as fs;
|
||||
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
||||
use actix_web::{
|
||||
dev::Server,
|
||||
http::header,
|
||||
middleware::{Compress, Logger},
|
||||
web, App, HttpServer,
|
||||
};
|
||||
use config::parser::Config;
|
||||
use handlebars::Handlebars;
|
||||
use handler::paths::{file_path, FileType};
|
||||
|
@ -42,7 +47,7 @@ use handler::paths::{file_path, FileType};
|
|||
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||
let mut handlebars: Handlebars = Handlebars::new();
|
||||
|
||||
let public_folder_path: String = file_path(FileType::Theme)?;
|
||||
let public_folder_path: &str = file_path(FileType::Theme)?;
|
||||
|
||||
handlebars
|
||||
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
||||
|
@ -68,6 +73,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|||
.app_data(web::Data::new(config.clone()))
|
||||
.wrap(cors)
|
||||
.wrap(Logger::default()) // added logging middleware for logging.
|
||||
.wrap(Compress::default()) // compress request headers to reduce memory usage.
|
||||
// Serve images and static files (css and js files).
|
||||
.service(
|
||||
fs::Files::new("/static", format!("{}/static", public_folder_path))
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
//! data scraped from the upstream search engines.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
||||
|
||||
|
@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
|||
/// (href url in html in simple words).
|
||||
/// * `description` - The description of the search result.
|
||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
pub title: String,
|
||||
pub url: String,
|
||||
pub description: String,
|
||||
pub engine: Vec<String>,
|
||||
pub engine: SmallVec<[String; 0]>,
|
||||
}
|
||||
|
||||
impl SearchResult {
|
||||
|
@ -35,12 +36,12 @@ impl SearchResult {
|
|||
/// (href url in html in simple words).
|
||||
/// * `description` - The description of the search result.
|
||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||
pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
|
||||
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
||||
SearchResult {
|
||||
title,
|
||||
url,
|
||||
description,
|
||||
engine,
|
||||
title: title.to_owned(),
|
||||
url: url.to_owned(),
|
||||
description: description.to_owned(),
|
||||
engine: engine.iter().map(|name| name.to_string()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,8 +50,8 @@ impl SearchResult {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `engine` - Takes an engine name provided as a String.
|
||||
pub fn add_engines(&mut self, engine: String) {
|
||||
self.engine.push(engine)
|
||||
pub fn add_engines(&mut self, engine: &str) {
|
||||
self.engine.push(engine.to_owned())
|
||||
}
|
||||
|
||||
/// A function which returns the engine name stored from the struct as a string.
|
||||
|
@ -58,13 +59,12 @@ impl SearchResult {
|
|||
/// # Returns
|
||||
///
|
||||
/// An engine name stored as a string from the struct.
|
||||
pub fn engine(self) -> String {
|
||||
self.engine.get(0).unwrap().to_string()
|
||||
pub fn engine(&mut self) -> String {
|
||||
std::mem::take(&mut self.engine[0])
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct EngineErrorInfo {
|
||||
pub error: String,
|
||||
pub engine: String,
|
||||
|
@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
|
|||
}
|
||||
|
||||
impl EngineErrorInfo {
|
||||
pub fn new(error: &EngineError, engine: String) -> Self {
|
||||
pub fn new(error: &EngineError, engine: &str) -> Self {
|
||||
Self {
|
||||
error: match error {
|
||||
EngineError::RequestError => String::from("RequestError"),
|
||||
EngineError::EmptyResultSet => String::from("EmptyResultSet"),
|
||||
EngineError::UnexpectedError => String::from("UnexpectedError"),
|
||||
EngineError::RequestError => "RequestError".to_owned(),
|
||||
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
||||
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
||||
},
|
||||
engine,
|
||||
engine: engine.to_owned(),
|
||||
severity_color: match error {
|
||||
EngineError::RequestError => String::from("green"),
|
||||
EngineError::EmptyResultSet => String::from("blue"),
|
||||
EngineError::UnexpectedError => String::from("red"),
|
||||
EngineError::RequestError => "green".to_owned(),
|
||||
EngineError::EmptyResultSet => "blue".to_owned(),
|
||||
EngineError::UnexpectedError => "red".to_owned(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -127,10 +127,10 @@ impl SearchResults {
|
|||
/// * ``
|
||||
pub fn new(
|
||||
results: Vec<SearchResult>,
|
||||
page_query: String,
|
||||
engine_errors_info: Vec<EngineErrorInfo>,
|
||||
page_query: &str,
|
||||
engine_errors_info: &[EngineErrorInfo],
|
||||
) -> Self {
|
||||
SearchResults {
|
||||
Self {
|
||||
results,
|
||||
page_query,
|
||||
style: Style::default(),
|
||||
|
|
|
@ -64,15 +64,15 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|||
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
||||
/// containing appropriate values.
|
||||
pub async fn aggregate(
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
random_delay: bool,
|
||||
debug: bool,
|
||||
upstream_search_engines: Vec<EngineHandler>,
|
||||
upstream_search_engines: &[EngineHandler],
|
||||
request_timeout: u8,
|
||||
safe_search: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
let user_agent: String = random_user_agent();
|
||||
let user_agent: &str = random_user_agent();
|
||||
|
||||
// Add a random delay before making the request.
|
||||
if random_delay || !debug {
|
||||
|
@ -81,16 +81,15 @@ pub async fn aggregate(
|
|||
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
||||
}
|
||||
|
||||
let mut names: Vec<&str> = vec![];
|
||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||
|
||||
// create tasks for upstream result fetching
|
||||
let mut tasks: FutureVec = FutureVec::new();
|
||||
|
||||
for engine_handler in upstream_search_engines {
|
||||
let (name, search_engine) = engine_handler.into_name_engine();
|
||||
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
||||
names.push(name);
|
||||
let query: String = query.clone();
|
||||
let user_agent: String = user_agent.clone();
|
||||
let query: String = query.to_owned();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
search_engine
|
||||
.results(
|
||||
|
@ -117,7 +116,7 @@ pub async fn aggregate(
|
|||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||
|
||||
let mut handle_error = |error: Report<EngineError>, engine_name: String| {
|
||||
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||
log::error!("Engine Error: {:?}", error);
|
||||
engine_errors_info.push(EngineErrorInfo::new(
|
||||
error.downcast_ref::<EngineError>().unwrap(),
|
||||
|
@ -127,7 +126,7 @@ pub async fn aggregate(
|
|||
|
||||
for _ in 0..responses.len() {
|
||||
let response = responses.pop().unwrap();
|
||||
let engine = names.pop().unwrap().to_string();
|
||||
let engine = names.pop().unwrap();
|
||||
|
||||
if result_map.is_empty() {
|
||||
match response {
|
||||
|
@ -135,7 +134,7 @@ pub async fn aggregate(
|
|||
result_map = results.clone();
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(error, engine);
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
@ -147,13 +146,13 @@ pub async fn aggregate(
|
|||
result_map
|
||||
.entry(key)
|
||||
.and_modify(|result| {
|
||||
result.add_engines(engine.clone());
|
||||
result.add_engines(engine);
|
||||
})
|
||||
.or_insert_with(|| -> SearchResult { value });
|
||||
});
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(error, engine);
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,11 +176,7 @@ pub async fn aggregate(
|
|||
|
||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||
|
||||
Ok(SearchResults::new(
|
||||
results,
|
||||
query.to_string(),
|
||||
engine_errors_info,
|
||||
))
|
||||
Ok(SearchResults::new(results, query, &engine_errors_info))
|
||||
}
|
||||
|
||||
/// Filters a map of search results using a list of regex patterns.
|
||||
|
@ -212,7 +207,10 @@ pub fn filter_with_lists(
|
|||
|| re.is_match(&search_result.description.to_lowercase())
|
||||
{
|
||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
||||
resultant_map.insert(
|
||||
url.to_owned(),
|
||||
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -223,6 +221,7 @@ pub fn filter_with_lists(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use smallvec::smallvec;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
@ -232,22 +231,22 @@ mod tests {
|
|||
// Create a map of search results to filter
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.rust-lang.org/".to_string(),
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_string(),
|
||||
url: "https://www.rust-lang.org/".to_string(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
@ -276,22 +275,22 @@ mod tests {
|
|||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.rust-lang.org/".to_string(),
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_string(),
|
||||
url: "https://www.rust-lang.org/".to_string(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
@ -336,13 +335,13 @@ mod tests {
|
|||
fn test_filter_with_lists_invalid_regex() {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
@ -1,28 +1,32 @@
|
|||
//! This module provides the functionality to generate random user agent string.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
||||
|
||||
static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
|
||||
UserAgentsBuilder::new()
|
||||
.cache(false)
|
||||
.dir("/tmp")
|
||||
.thread(1)
|
||||
.set_browsers(
|
||||
Browsers::new()
|
||||
.set_chrome()
|
||||
.set_safari()
|
||||
.set_edge()
|
||||
.set_firefox()
|
||||
.set_mozilla(),
|
||||
)
|
||||
.build()
|
||||
});
|
||||
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
||||
|
||||
/// A function to generate random user agent to improve privacy of the user.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A randomly generated user agent string.
|
||||
pub fn random_user_agent() -> String {
|
||||
USER_AGENTS.random().to_string()
|
||||
pub fn random_user_agent() -> &'static str {
|
||||
USER_AGENTS
|
||||
.get_or_init(|| {
|
||||
UserAgentsBuilder::new()
|
||||
.cache(false)
|
||||
.dir("/tmp")
|
||||
.thread(1)
|
||||
.set_browsers(
|
||||
Browsers::new()
|
||||
.set_chrome()
|
||||
.set_safari()
|
||||
.set_edge()
|
||||
.set_firefox()
|
||||
.set_mozilla(),
|
||||
)
|
||||
.build()
|
||||
})
|
||||
.random()
|
||||
}
|
||||
|
|
|
@ -20,6 +20,10 @@ use regex::Regex;
|
|||
use serde::Deserialize;
|
||||
use tokio::join;
|
||||
|
||||
// ---- Constants ----
|
||||
/// Initialize redis cache connection once and store it on the heap.
|
||||
const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
||||
|
||||
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||
///
|
||||
/// # Fields
|
||||
|
@ -67,10 +71,10 @@ pub async fn not_found(
|
|||
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize)]
|
||||
struct Cookie {
|
||||
theme: String,
|
||||
colorscheme: String,
|
||||
engines: Vec<String>,
|
||||
struct Cookie<'a> {
|
||||
theme: &'a str,
|
||||
colorscheme: &'a str,
|
||||
engines: Vec<&'a str>,
|
||||
}
|
||||
|
||||
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
||||
|
@ -128,7 +132,7 @@ pub async fn search(
|
|||
safe_search
|
||||
),
|
||||
&config,
|
||||
query.to_string(),
|
||||
query,
|
||||
page - 1,
|
||||
req.clone(),
|
||||
safe_search
|
||||
|
@ -139,7 +143,7 @@ pub async fn search(
|
|||
config.binding_ip, config.port, query, page, safe_search
|
||||
),
|
||||
&config,
|
||||
query.to_string(),
|
||||
query,
|
||||
page,
|
||||
req.clone(),
|
||||
safe_search
|
||||
|
@ -154,7 +158,7 @@ pub async fn search(
|
|||
safe_search
|
||||
),
|
||||
&config,
|
||||
query.to_string(),
|
||||
query,
|
||||
page + 1,
|
||||
req.clone(),
|
||||
safe_search
|
||||
|
@ -175,15 +179,22 @@ pub async fn search(
|
|||
async fn results(
|
||||
url: String,
|
||||
config: &Config,
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
req: HttpRequest,
|
||||
safe_search: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
//Initialize redis cache connection struct
|
||||
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
||||
let redis_cache: RedisCache = REDIS_CACHE
|
||||
.get_or_init(async {
|
||||
// Initialize redis cache connection pool only one and store it in the heap.
|
||||
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
||||
})
|
||||
.await
|
||||
.clone();
|
||||
|
||||
// fetch the cached results json.
|
||||
let cached_results_json = redis_cache.cached_json(&url);
|
||||
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
|
||||
redis_cache.clone().cached_json(&url).await;
|
||||
// check if fetched cache results was indeed fetched or it was an error and if so
|
||||
// handle the data accordingly.
|
||||
match cached_results_json {
|
||||
|
@ -212,7 +223,7 @@ async fn results(
|
|||
Some(cookie_value) => {
|
||||
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
||||
|
||||
let engines = cookie_value
|
||||
let engines: Vec<EngineHandler> = cookie_value
|
||||
.engines
|
||||
.iter()
|
||||
.filter_map(|name| EngineHandler::new(name))
|
||||
|
@ -223,7 +234,7 @@ async fn results(
|
|||
page,
|
||||
config.aggregator.random_delay,
|
||||
config.debug,
|
||||
engines,
|
||||
&engines,
|
||||
config.request_timeout,
|
||||
safe_search,
|
||||
)
|
||||
|
@ -235,7 +246,7 @@ async fn results(
|
|||
page,
|
||||
config.aggregator.random_delay,
|
||||
config.debug,
|
||||
config.upstream_search_engines.clone(),
|
||||
&config.upstream_search_engines,
|
||||
config.request_timeout,
|
||||
safe_search,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue