new Engines struct, breaks cookies and frontend

This commit is contained in:
Milim 2024-08-15 12:18:19 +02:00
parent 2ca9990e54
commit e666316995
No known key found for this signature in database
13 changed files with 134 additions and 154 deletions

View file

@ -4,7 +4,7 @@ use figment::{providers::Serialized, Figment};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
/// Struct holding config Options /// Struct holding config Options
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Config { pub struct Config {
/// It stores the parsed port number option on which the server should launch. /// It stores the parsed port number option on which the server should launch.
pub port: u16, pub port: u16,
@ -21,7 +21,7 @@ pub struct Config {
/// It toggles whether to use adaptive HTTP windows /// It toggles whether to use adaptive HTTP windows
pub adaptive_window: bool, pub adaptive_window: bool,
/// It stores all the engine names that were enabled by the user. /// It stores all the engine names that were enabled by the user.
pub upstream_search_engines: Vec<String>, pub upstream_search_engines: crate::engines::Engines,
/// It stores the time (secs) which controls the server request timeout. /// It stores the time (secs) which controls the server request timeout.
pub request_timeout: u8, pub request_timeout: u8,
/// Set the keep-alive time for client connections to the HTTP server /// Set the keep-alive time for client connections to the HTTP server
@ -67,15 +67,7 @@ impl Default for Config {
logging: true, logging: true,
debug: false, debug: false,
adaptive_window: false, adaptive_window: false,
upstream_search_engines: vec![ upstream_search_engines: Default::default(),
"bing".into(),
"brave".into(),
"duckduckgo".into(),
"librex".into(),
"mojeek".into(),
"searx".into(),
"startpage".into(),
],
request_timeout: 2, request_timeout: 2,
tcp_connection_keep_alive: 10, tcp_connection_keep_alive: 10,
pool_idle_connection_timeout: 30, pool_idle_connection_timeout: 30,

View file

@ -24,18 +24,19 @@ pub struct Bing {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl Bing { impl Default for Bing {
/// Creates the Bing parser. /// Creates the Bing parser.
pub fn new() -> Result<Self, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
".b_results", ".b_results",
".b_algo", ".b_algo",
"h2 a", "h2 a",
".tpcn a.tilk", ".tpcn a.tilk",
".b_caption p", ".b_caption p",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -20,18 +20,19 @@ pub struct Brave {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl Brave { impl Default for Brave {
/// Creates the Brave parser. /// Creates the Brave parser.
pub fn new() -> Result<Brave, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
"#results h4", "#results h4",
"#results [data-pos]", "#results [data-pos]",
"a > .url", "a > .url",
"a", "a",
".snippet-description", ".snippet-description",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -23,18 +23,19 @@ pub struct DuckDuckGo {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl DuckDuckGo { impl Default for DuckDuckGo {
/// Creates the DuckDuckGo parser. /// Creates the DuckDuckGo parser.
pub fn new() -> Result<Self, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
".no-results", ".no-results",
".results>.result", ".results>.result",
".result__title>.result__a", ".result__title>.result__a",
".result__url", ".result__url",
".result__snippet", ".result__snippet",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -20,22 +20,23 @@ pub struct LibreX {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl LibreX { impl Default for LibreX {
/// Creates a new instance of LibreX with a default configuration. /// Creates a new instance of LibreX with a default configuration.
/// ///
/// # Returns /// # Returns
/// ///
/// Returns a `Result` containing `LibreX` if successful, otherwise an `EngineError`. /// Returns a `Result` containing `LibreX` if successful, otherwise an `EngineError`.
pub fn new() -> Result<Self, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
".text-result-container>p", ".text-result-container>p",
".text-result-container", ".text-result-container",
".text-result-wrapper>a>h2", ".text-result-wrapper>a>h2",
".text-result-wrapper>a", ".text-result-wrapper>a",
".text-result-wrapper>span", ".text-result-wrapper>span",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -3,6 +3,12 @@
//! provide a standard functions to be implemented for all the upstream search engine handling //! provide a standard functions to be implemented for all the upstream search engine handling
//! code. Moreover, it also provides a custom error for the upstream search engine handling code. //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::models::engine_models::EngineHandler;
pub mod bing; pub mod bing;
pub mod brave; pub mod brave;
pub mod duckduckgo; pub mod duckduckgo;
@ -11,3 +17,66 @@ pub mod mojeek;
pub mod search_result_parser; pub mod search_result_parser;
pub mod searx; pub mod searx;
pub mod startpage; pub mod startpage;
/// Struct that keeps track of search engines
#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
pub struct Engines {
bing: bool,
brave: bool,
duckduckgo: bool,
librex: bool,
mojeek: bool,
search_result_parser: bool,
searx: bool,
startpage: bool,
}
impl Default for Engines {
fn default() -> Self {
Self {
bing: true,
brave: true,
duckduckgo: true,
librex: true,
mojeek: true,
search_result_parser: true,
searx: true,
startpage: true,
}
}
}
impl From<&Engines> for Vec<EngineHandler> {
fn from(value: &Engines) -> Self {
let mut v = vec![];
if value.duckduckgo {
let engine = crate::engines::duckduckgo::DuckDuckGo::default();
v.push(EngineHandler::new("duckduckgo", Arc::new(engine)));
}
if value.searx {
let engine = crate::engines::searx::Searx::default();
v.push(EngineHandler::new("searx", Arc::new(engine)));
}
if value.brave {
let engine = crate::engines::brave::Brave::default();
v.push(EngineHandler::new("brave", Arc::new(engine)));
}
if value.startpage {
let engine = crate::engines::startpage::Startpage::default();
v.push(EngineHandler::new("startpage", Arc::new(engine)));
}
if value.librex {
let engine = crate::engines::librex::LibreX::default();
v.push(EngineHandler::new("librex", Arc::new(engine)));
}
if value.mojeek {
let engine = crate::engines::mojeek::Mojeek::default();
v.push(EngineHandler::new("mojeek", Arc::new(engine)));
}
if value.bing {
let engine = crate::engines::bing::Bing::default();
v.push(EngineHandler::new("bing", Arc::new(engine)));
}
v
}
}

View file

@ -23,18 +23,19 @@ pub struct Mojeek {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl Mojeek { impl Default for Mojeek {
/// Creates the Mojeek parser. /// Creates the Mojeek parser.
pub fn new() -> Result<Self, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
".result-col", ".result-col",
".results-standard li", ".results-standard li",
"a span.url", "a span.url",
"h2 a.title", "h2 a.title",
"p.s", "p.s",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -19,18 +19,19 @@ pub struct Searx {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl Searx { impl Default for Searx {
/// creates a Searx parser /// creates a Searx parser
pub fn new() -> Result<Searx, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
"#urls>.dialog-error>p", "#urls>.dialog-error>p",
".result", ".result",
"h3>a", "h3>a",
"h3>a", "h3>a",
".content", ".content",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -23,18 +23,19 @@ pub struct Startpage {
parser: SearchResultParser, parser: SearchResultParser,
} }
impl Startpage { impl Default for Startpage {
/// Creates the Startpage parser. /// Creates the Startpage parser.
pub fn new() -> Result<Self, EngineError> { fn default() -> Self {
Ok(Self { Self {
parser: SearchResultParser::new( parser: SearchResultParser::new(
".no-results", ".no-results",
".w-gl__result__main", ".w-gl__result__main",
".w-gl__result-second-line-container>.w-gl__result-title>h3", ".w-gl__result-second-line-container>.w-gl__result-title>h3",
".w-gl__result-url", ".w-gl__result-url",
".w-gl__description", ".w-gl__description",
)?, )
}) .expect("somehow you changed the static stings in the binary i guess"),
}
} }
} }

View file

@ -2,7 +2,7 @@
//! and register all the routes for the `crabbysearch` meta search engine website. //! and register all the routes for the `crabbysearch` meta search engine website.
#![forbid(unsafe_code, clippy::panic)] #![forbid(unsafe_code, clippy::panic)]
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)] #![deny(missing_docs, clippy::perf)]
#![warn(clippy::cognitive_complexity, rust_2018_idioms)] #![warn(clippy::cognitive_complexity, rust_2018_idioms)]
pub mod cache; pub mod cache;
@ -50,7 +50,7 @@ async fn main() {
config.port, config.port,
); );
let listener = TcpListener::bind((config.binding_ip.as_str(), config.port)) let listener = TcpListener::bind((config.binding_ip.clone(), config.port))
.expect("could not create TcpListener"); .expect("could not create TcpListener");
let public_folder_path: &str = file_path(FileType::Theme).unwrap(); let public_folder_path: &str = file_path(FileType::Theme).unwrap();

View file

@ -1,10 +1,12 @@
//! This module provides the error enum to handle different errors associated while requesting data from //! This module provides the error enum to handle different errors associated while requesting data from
//! the upstream search engines with the search query provided by the user. //! the upstream search engines with the search query provided by the user.
use crate::engines;
use super::aggregation_models::SearchResult; use super::aggregation_models::SearchResult;
use error_stack::{Report, Result, ResultExt}; use error_stack::{Report, Result, ResultExt};
use reqwest::Client; use reqwest::Client;
use std::fmt; use std::{fmt, sync::Arc};
/// A custom error type used for handle engine associated errors. /// A custom error type used for handle engine associated errors.
#[derive(Debug)] #[derive(Debug)]
@ -153,14 +155,17 @@ pub trait SearchEngine: Sync + Send {
pub struct EngineHandler { pub struct EngineHandler {
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
/// the `SearchEngine` trait. /// the `SearchEngine` trait.
engine: Box<dyn SearchEngine>, engine: Arc<dyn SearchEngine>,
/// It stores the name of the engine to which the struct is associated to. /// It stores the name of the engine to which the struct is associated to.
name: &'static str, name: &'static str,
} }
impl Clone for EngineHandler { impl Clone for EngineHandler {
fn clone(&self) -> Self { fn clone(&self) -> Self {
Self::new(self.name).unwrap() Self {
engine: self.engine.clone(),
name: self.name.clone(),
}
} }
} }
@ -174,53 +179,13 @@ impl EngineHandler {
/// # Returns /// # Returns
/// ///
/// It returns an option either containing the value or a none if the engine is unknown /// It returns an option either containing the value or a none if the engine is unknown
pub fn new(engine_name: &str) -> Result<Self, EngineError> { pub fn new(name: &'static str, engine: Arc<dyn SearchEngine>) -> Self {
let engine: (&'static str, Box<dyn SearchEngine>) = Self { name, engine }
match engine_name.to_lowercase().as_str() {
"duckduckgo" => {
let engine = crate::engines::duckduckgo::DuckDuckGo::new()?;
("duckduckgo", Box::new(engine))
}
"searx" => {
let engine = crate::engines::searx::Searx::new()?;
("searx", Box::new(engine))
}
"brave" => {
let engine = crate::engines::brave::Brave::new()?;
("brave", Box::new(engine))
}
"startpage" => {
let engine = crate::engines::startpage::Startpage::new()?;
("startpage", Box::new(engine))
}
"librex" => {
let engine = crate::engines::librex::LibreX::new()?;
("librex", Box::new(engine))
}
"mojeek" => {
let engine = crate::engines::mojeek::Mojeek::new()?;
("mojeek", Box::new(engine))
}
"bing" => {
let engine = crate::engines::bing::Bing::new()?;
("bing", Box::new(engine))
}
_ => {
return Err(Report::from(EngineError::NoSuchEngineFound(
engine_name.to_string(),
)))
}
};
Ok(Self {
engine: engine.1,
name: engine.0,
})
} }
/// This function converts the EngineHandler type into a tuple containing the engine name and /// This function converts the EngineHandler type into a tuple containing the engine name and
/// the associated engine struct. /// the associated engine struct.
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) { pub fn into_name_engine(self) -> (&'static str, Arc<dyn SearchEngine>) {
(self.name, self.engine) (self.name, self.engine)
} }
} }

View file

@ -1,11 +1,7 @@
//! This module provides the models to parse cookies and search parameters from the search //! This module provides the models to parse cookies and search parameters from the search
//! engine website. //! engine website.
use std::borrow::Cow;
use serde::Deserialize; use serde::Deserialize;
use crate::config::Style;
/// A named struct which deserializes all the user provided search parameters and stores them. /// A named struct which deserializes all the user provided search parameters and stores them.
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct SearchParams { pub struct SearchParams {
@ -19,27 +15,3 @@ pub struct SearchParams {
/// search url. /// search url.
pub safesearch: Option<u8>, pub safesearch: Option<u8>,
} }
/// A named struct which is used to deserialize the cookies fetched from the client side.
#[allow(dead_code)]
#[derive(Deserialize)]
pub struct Cookie<'a> {
/// It stores the theme name used in the website.
pub theme: Cow<'a, str>,
/// It stores the colorscheme name used for the website theme.
pub colorscheme: Cow<'a, str>,
/// It stores the user selected upstream search engines selected from the UI.
pub engines: Cow<'a, Vec<Cow<'a, str>>>,
}
impl<'a> Cookie<'a> {
/// server_models::Cookie contructor function
pub fn build(style: &'a Style, mut engines: Vec<Cow<'a, str>>) -> Self {
engines.sort();
Self {
theme: Cow::Borrowed(&style.theme),
colorscheme: Cow::Borrowed(&style.colorscheme),
engines: Cow::Owned(engines),
}
}
}

View file

@ -3,6 +3,7 @@
use crate::{ use crate::{
cache::Cache, cache::Cache,
config::Config, config::Config,
engines::Engines,
models::{ models::{
aggregation_models::SearchResults, aggregation_models::SearchResults,
engine_models::EngineHandler, engine_models::EngineHandler,
@ -47,18 +48,9 @@ pub async fn search(
let cookie = req.cookie("appCookie"); let cookie = req.cookie("appCookie");
// Get search settings using the user's cookie or from the server's config // Get search settings using the user's cookie or from the server's config
let search_settings: server_models::Cookie<'_> = cookie let search_settings: crate::engines::Engines = cookie
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok()) .and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
.unwrap_or_else(|| { .unwrap();
server_models::Cookie::build(
&config.style,
config
.upstream_search_engines
.iter()
.map(|e| Cow::Borrowed(e.as_str()))
.collect(),
)
});
// Closure wrapping the results function capturing local references // Closure wrapping the results function capturing local references
let get_results = |page| results(config.clone(), cache.clone(), query, page, &search_settings); let get_results = |page| results(config.clone(), cache.clone(), query, page, &search_settings);
@ -140,16 +132,11 @@ async fn results(
cache: web::Data<crate::cache::Cache>, cache: web::Data<crate::cache::Cache>,
query: &str, query: &str,
page: u32, page: u32,
search_settings: &server_models::Cookie<'_>, upstream: &Engines,
) -> Result<(SearchResults, String), Box<dyn std::error::Error>> { ) -> Result<(SearchResults, String), Box<dyn std::error::Error>> {
// eagerly parse cookie value to evaluate safe search level // eagerly parse cookie value to evaluate safe search level
let cache_key = format!( let cache_key = format!("search?q={}&page={}&engines={:?}", query, page, upstream);
"search?q={}&page={}&engines={}",
query,
page,
search_settings.engines.join(",")
);
// fetch the cached results json. // fetch the cached results json.
let response = cache.cached_results(&cache_key); let response = cache.cached_results(&cache_key);
@ -162,20 +149,8 @@ async fn results(
// default selected upstream search engines from the config file otherwise // default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the // parse the non-empty cookie and grab the user selected engines from the
// UI and use that. // UI and use that.
let mut results: SearchResults = match search_settings.engines.is_empty() { let mut results: SearchResults = match true {
false => { false => aggregate(query, page, config, &Vec::<EngineHandler>::from(upstream)).await?,
aggregate(
query,
page,
config,
&search_settings
.engines
.iter()
.filter_map(|engine| EngineHandler::new(engine).ok())
.collect::<Vec<EngineHandler>>(),
)
.await?
}
true => { true => {
let mut search_results = SearchResults::default(); let mut search_results = SearchResults::default();
search_results.set_no_engines_selected(); search_results.set_no_engines_selected();