strip garbage

This commit is contained in:
Milim 2024-08-14 21:34:18 +02:00
parent d5524d7eae
commit 6567af268e
No known key found for this signature in database
36 changed files with 496 additions and 2434 deletions

View file

@ -1,54 +0,0 @@
//! Main module of the application
//!
//! This module contains the main function which handles the logging of the application to the
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
#[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc;
use std::{net::TcpListener, sync::OnceLock};
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
/// A dhat heap memory profiler
#[cfg(feature = "dhat-heap")]
#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;
#[cfg(not(feature = "dhat-heap"))]
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
/// The function that launches the main server and registers all the routes of the website.
///
/// # Error
///
/// Returns an error if the port is being used by something else on the system and is not
/// available for being used for other applications.
#[actix_web::main]
async fn main() -> std::io::Result<()> {
// A dhat heap profiler initialization.
#[cfg(feature = "dhat-heap")]
let _profiler = dhat::Profiler::new_heap();
// Initialize the parsed config globally.
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
let cache = create_cache(config).await;
log::info!(
"started server on port {} and IP {}",
config.port,
config.binding_ip
);
log::info!(
"Open http://{}:{}/ in your browser",
config.binding_ip,
config.port,
);
let listener = TcpListener::bind((config.binding_ip.as_str(), config.port))?;
run(listener, config, cache)?.await
}

570
src/cache/cacher.rs vendored
View file

@ -2,583 +2,57 @@
//! from the upstream search engines in a json format.
use error_stack::Report;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::Cache as MokaCache;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::ConcurrentCacheExt;
#[cfg(feature = "memory-cache")]
use std::time::Duration;
use tokio::sync::Mutex;
use crate::{config::parser::Config, models::aggregation_models::SearchResults};
use super::error::CacheError;
#[cfg(feature = "redis-cache")]
use super::redis_cacher::RedisCache;
#[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))]
use super::encryption::*;
/// Abstraction trait for common methods provided by a cache backend.
#[async_trait::async_trait]
pub trait Cacher: Send + Sync {
// A function that builds the cache from the given configuration.
///
/// # Arguments
///
/// * `config` - It takes the config struct as an argument.
///
/// # Returns
///
/// It returns a newly initialized backend based on the feature enabled by the user.
async fn build(config: &Config) -> Self
where
Self: Sized;
/// A function which fetches the cached json results as json string.
///
/// # Arguments
///
/// * `url` - It takes an url as a string.
///
/// # Error
///
/// Returns the `SearchResults` from the cache if the program executes normally otherwise
/// returns a `CacheError` if the results cannot be retrieved from the cache.
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>>;
/// A function which caches the results by using the `url` as the key and
/// `json results` as the value and stores it in the cache
///
/// # Arguments
///
/// * `json_results` - It takes the json results string as an argument.
/// * `url` - It takes the url as a String.
///
/// # Error
///
/// Returns a unit type if the program caches the given search results without a failure
/// otherwise it returns a `CacheError` if the search results cannot be cached due to a
/// failure.
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>>;
/// A helper function which computes the hash of the url and formats and returns it as string.
///
/// # Arguments
///
/// * `url` - It takes an url as string.
fn hash_url(&self, url: &str) -> String {
blake3::hash(url.as_bytes()).to_string()
}
/// A helper function that returns either encrypted or decrypted results.
/// Feature flags (**encrypt-cache-results or cec-cache-results**) are required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
/// * `encrypt` - A boolean to choose whether to encrypt or decrypt the bytes
///
/// # Error
/// Returns either encrypted or decrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(
// feature = "compress-cache-results",
feature = "encrypt-cache-results",
feature = "cec-cache-results"
))]
async fn encrypt_or_decrypt_results(
&mut self,
mut bytes: Vec<u8>,
encrypt: bool,
) -> Result<Vec<u8>, Report<CacheError>> {
use chacha20poly1305::{
aead::{Aead, AeadCore, KeyInit, OsRng},
ChaCha20Poly1305,
};
let cipher = CIPHER.get_or_init(|| {
let key = ChaCha20Poly1305::generate_key(&mut OsRng);
ChaCha20Poly1305::new(&key)
});
let encryption_key = ENCRYPTION_KEY.get_or_init(
|| ChaCha20Poly1305::generate_nonce(&mut OsRng), // 96-bits; unique per message
);
bytes = if encrypt {
cipher
.encrypt(encryption_key, bytes.as_ref())
.map_err(|_| CacheError::EncryptionError)?
} else {
cipher
.decrypt(encryption_key, bytes.as_ref())
.map_err(|_| CacheError::EncryptionError)?
};
Ok(bytes)
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the compressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
writer
.write_all(&bytes)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?;
bytes = writer.into_inner();
Ok(bytes)
}
/// A helper function that returns compressed-encrypted results.
/// Feature flag (**cec-cache-results**) is required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(feature = "cec-cache-results")]
async fn compress_encrypt_compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
// compress first
bytes = self.compress_results(bytes).await?;
// encrypt
bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
// compress again;
bytes = self.compress_results(bytes).await?;
Ok(bytes)
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
/// If bytes where
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the uncompressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
cfg_if::cfg_if! {
if #[cfg(feature = "compress-cache-results")]
{
decompress_util(bytes).await
}
else if #[cfg(feature = "cec-cache-results")]
{
let decompressed = decompress_util(bytes)?;
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
decompress_util(&decrypted).await
}
}
}
/// A helper function that compresses or encrypts search results before they're inserted into a cache store
/// # Arguments
///
/// * `search_results` - A reference to the search_Results to process.
///
///
/// # Error
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
/// on failure.
async fn pre_process_search_results(
&mut self,
search_results: &SearchResults,
) -> Result<Vec<u8>, Report<CacheError>> {
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
let mut bytes: Vec<u8> = search_results.try_into()?;
#[cfg(feature = "compress-cache-results")]
{
let compressed = self.compress_results(bytes).await?;
bytes = compressed;
}
#[cfg(feature = "encrypt-cache-results")]
{
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
bytes = encrypted;
}
#[cfg(feature = "cec-cache-results")]
{
let compressed_encrypted_compressed =
self.compress_encrypt_compress_results(bytes).await?;
bytes = compressed_encrypted_compressed;
}
Ok(bytes)
}
/// A helper function that decompresses or decrypts search results after they're fetched from the cache-store
/// # Arguments
///
/// * `bytes` - A Vec of bytes stores in the cache.
///
///
/// # Error
/// Returns the SearchResults struct on success otherwise it returns a CacheError
/// on failure.
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
async fn post_process_search_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<SearchResults, Report<CacheError>> {
#[cfg(feature = "compress-cache-results")]
{
let decompressed = self.decompress_results(&bytes).await?;
bytes = decompressed
}
#[cfg(feature = "encrypt-cache-results")]
{
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
bytes = decrypted
}
#[cfg(feature = "cec-cache-results")]
{
let decompressed_decrypted = self.decompress_results(&bytes).await?;
bytes = decompressed_decrypted;
}
Ok(bytes.try_into()?)
impl Into<SearchResults> for Vec<u8> {
fn into(self) -> SearchResults {
serde_json::from_slice(&self)
.expect("well, this can only be caused by memory corruption so good luck")
}
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
/// If bytes where
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the uncompressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
writer
.write_all(input)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?;
let bytes = writer.into_inner();
Ok(bytes)
}
#[cfg(feature = "redis-cache")]
#[async_trait::async_trait]
impl Cacher for RedisCache {
async fn build(config: &Config) -> Self {
log::info!(
"Initialising redis cache. Listening to {}",
&config.redis_url
);
RedisCache::new(&config.redis_url, 5, config.cache_expiry_time)
.await
.expect("Redis cache configured")
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
use base64::Engine;
let hashed_url_string: &str = &self.hash_url(url);
let base64_string = self.cached_json(hashed_url_string).await?;
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(base64_string)
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
self.post_process_search_results(bytes).await
}
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
use base64::Engine;
// size of search_results is expected to be equal to size of urls -> key/value pairs for cache;
let search_results_len = search_results.len();
let mut bytes = Vec::with_capacity(search_results_len);
for result in search_results {
let processed = self.pre_process_search_results(result).await?;
bytes.push(processed);
}
let base64_strings = bytes
.iter()
.map(|bytes_vec| base64::engine::general_purpose::STANDARD_NO_PAD.encode(bytes_vec));
let mut hashed_url_strings = Vec::with_capacity(search_results_len);
for url in urls {
let hash = self.hash_url(url);
hashed_url_strings.push(hash);
}
self.cache_json(base64_strings, hashed_url_strings.into_iter())
.await
}
}
/// TryInto implementation for SearchResults from Vec<u8>
use std::convert::TryInto;
impl TryInto<SearchResults> for Vec<u8> {
type Error = CacheError;
fn try_into(self) -> Result<SearchResults, Self::Error> {
serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
}
}
impl TryInto<Vec<u8>> for &SearchResults {
type Error = CacheError;
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
impl Into<Vec<u8>> for &SearchResults {
fn into(self) -> Vec<u8> {
serde_json::to_vec(self).expect("somehow failed to serialize search results")
}
}
/// Memory based cache backend.
#[cfg(feature = "memory-cache")]
pub struct InMemoryCache {
#[derive(Clone)]
pub struct Cache {
/// The backend cache which stores data.
cache: MokaCache<String, Vec<u8>>,
}
#[cfg(feature = "memory-cache")]
#[async_trait::async_trait]
impl Cacher for InMemoryCache {
async fn build(config: &Config) -> Self {
log::info!("Initialising in-memory cache");
impl Cache {
/// Build new cache
pub fn build(config: &Config) -> Self {
log::info!("Initializing in-memory cache");
InMemoryCache {
Self {
cache: MokaCache::builder()
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
.time_to_live(Duration::from_secs(config.cache_expiry_time))
.build(),
}
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) {
Some(res) => self.post_process_search_results(res).await,
None => Err(Report::new(CacheError::MissingValue)),
}
/// Retrieve Cached results
pub fn cached_results(&self, url: &str) -> Option<SearchResults> {
self.cache.get(&url.to_string()).map(|b| b.into())
}
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
/// Cache results
pub fn cache_results(&self, search_results: &[SearchResults], urls: &[String]) {
for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result).await?;
self.cache.insert(hashed_url_string, bytes);
self.cache.insert(url.clone(), search_result.into());
}
self.cache.sync();
Ok(())
}
}
/// Cache backend which utilises both memory and redis based caches.
///
/// The hybrid cache system uses both the types of cache to ensure maximum availability.
/// The set method sets the key, value pair in both the caches. Therefore in a case where redis
/// cache becomes unavailable, the backend will retreive the value from in-memory cache.
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
pub struct HybridCache {
/// The in-memory backend cache which stores data.
memory_cache: InMemoryCache,
/// The redis backend cache which stores data.
redis_cache: RedisCache,
}
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
#[async_trait::async_trait]
impl Cacher for HybridCache {
async fn build(config: &Config) -> Self {
log::info!("Initialising hybrid cache");
HybridCache {
memory_cache: InMemoryCache::build(config).await,
redis_cache: RedisCache::build(config).await,
}
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
match self.redis_cache.cached_results(url).await {
Ok(res) => Ok(res),
Err(_) => self.memory_cache.cached_results(url).await,
}
}
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
self.redis_cache.cache_results(search_results, urls).await?;
self.memory_cache
.cache_results(search_results, urls)
.await?;
Ok(())
}
}
/// Dummy cache backend
pub struct DisabledCache;
#[async_trait::async_trait]
impl Cacher for DisabledCache {
async fn build(_config: &Config) -> Self {
log::info!("Caching is disabled");
DisabledCache
}
async fn cached_results(&mut self, _url: &str) -> Result<SearchResults, Report<CacheError>> {
Err(Report::new(CacheError::MissingValue))
}
async fn cache_results(
&mut self,
_search_results: &[SearchResults],
_urls: &[String],
) -> Result<(), Report<CacheError>> {
Ok(())
}
}
/// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
pub struct SharedCache {
/// The internal cache protected from concurrent access by a mutex
cache: Mutex<Box<dyn Cacher>>,
}
impl SharedCache {
/// A function that creates a new `SharedCache` from a Cache implementation.
///
/// # Arguments
///
/// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type.
///
/// Returns a newly constructed `SharedCache` struct.
pub fn new(cache: impl Cacher + 'static) -> Self {
Self {
cache: Mutex::new(Box::new(cache)),
}
}
/// A getter function which retrieves the cached SearchResulsts from the internal cache.
///
/// # Arguments
///
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the
/// cached results from the cache.
///
/// # Error
///
/// Returns a `SearchResults` struct containing the search results from the cache if nothing
/// goes wrong otherwise returns a `CacheError`.
pub async fn cached_results(&self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let mut mut_cache = self.cache.lock().await;
mut_cache.cached_results(url).await
}
/// A setter function which caches the results by using the `url` as the key and
/// `SearchResults` as the value.
///
/// # Arguments
///
/// * `search_results` - It takes the `SearchResults` as an argument which are results that
/// needs to be cached.
/// * `url` - It takes the search url as an argument which will be used as the key for storing
/// results in the cache.
///
/// # Error
///
/// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError`
/// on a failure.
pub async fn cache_results(
&self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
let mut mut_cache = self.cache.lock().await;
mut_cache.cache_results(search_results, urls).await
}
}
/// A function to initialise the cache backend.
pub async fn create_cache(config: &Config) -> impl Cacher {
#[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
return HybridCache::build(config).await;
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
return InMemoryCache::build(config).await;
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
return RedisCache::build(config).await;
#[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
return DisabledCache::build(config).await;
}
//#[cfg(feature = "Compress-cache-results")]

View file

@ -1,25 +0,0 @@
use chacha20poly1305::{
consts::{B0, B1},
ChaChaPoly1305,
};
use std::sync::OnceLock;
use chacha20::{
cipher::{
generic_array::GenericArray,
typenum::{UInt, UTerm},
StreamCipherCoreWrapper,
},
ChaChaCore,
};
/// The ChaCha20 core wrapped in a stream cipher for use in ChaCha20-Poly1305 authenticated encryption.
type StreamCipherCoreWrapperType =
StreamCipherCoreWrapper<ChaChaCore<UInt<UInt<UInt<UInt<UTerm, B1>, B0>, B1>, B0>>>;
/// Our ChaCha20-Poly1305 cipher instance, lazily initialized.
pub static CIPHER: OnceLock<ChaChaPoly1305<StreamCipherCoreWrapperType>> = OnceLock::new();
/// The type alias for our encryption key, a 32-byte array.
type GenericArrayType = GenericArray<u8, UInt<UInt<UInt<UInt<UTerm, B1>, B1>, B0>, B0>>;
/// Our encryption key, lazily initialized.
pub static ENCRYPTION_KEY: OnceLock<GenericArrayType> = OnceLock::new();

View file

@ -1,14 +1,11 @@
//! This module provides the functionality to parse the lua config and convert the config options
//! into rust readable form.
use crate::handler::{file_path, FileType};
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
use log::LevelFilter;
use mlua::Lua;
use std::{collections::HashMap, fs, thread::available_parallelism};
use crate::models::parser_models::Style;
use std::collections::HashMap;
/// A named struct which stores the parsed config file options.
#[derive(Debug, Clone)]
pub struct Config {
/// It stores the parsed port number option on which the server should launch.
pub port: u16,
@ -16,15 +13,8 @@ pub struct Config {
pub binding_ip: String,
/// It stores the theming options for the website.
pub style: Style,
#[cfg(feature = "redis-cache")]
/// It stores the redis connection url address on which the redis
/// client should connect.
pub redis_url: String,
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
/// It stores the max TTL for search results in cache.
pub cache_expiry_time: u16,
/// It stores the option to whether enable or disable production use.
pub aggregator: AggregatorConfig,
/// Memory cache invalidation time
pub cache_expiry_time: u64,
/// It stores the option to whether enable or disable logs.
pub logging: bool,
/// It stores the option to whether enable or disable debug mode.
@ -32,149 +22,42 @@ pub struct Config {
/// It toggles whether to use adaptive HTTP windows
pub adaptive_window: bool,
/// It stores all the engine names that were enabled by the user.
pub upstream_search_engines: HashMap<String, bool>,
pub upstream_search_engines: Vec<String>,
/// It stores the time (secs) which controls the server request timeout.
pub request_timeout: u8,
/// It stores the number of threads which controls the app will use to run.
pub threads: u8,
/// Set the keep-alive time for client connections to the HTTP server
pub client_connection_keep_alive: u8,
/// It stores configuration options for the ratelimiting middleware.
pub rate_limiter: RateLimiter,
/// It stores the level of safe search to be used for restricting content in the
/// search results.
pub safe_search: u8,
//pub client_connection_keep_alive: u8,
/// It stores the TCP connection keepalive duration in seconds.
pub tcp_connection_keep_alive: u8,
/// It stores the pool idle connection timeout in seconds.
pub pool_idle_connection_timeout: u8,
}
impl Default for Config {
fn default() -> Self {
Self {
port: 8080,
binding_ip: "127.0.0.1".into(),
style: Style {
theme: "simple".into(),
colorscheme: "catppuccin-mocha".into(),
animation: Some("simple-frosted-glow".into()),
},
cache_expiry_time: 600,
logging: true,
debug: false,
adaptive_window: false,
upstream_search_engines: vec!["bing".into(), "brave".into()],
request_timeout: 2,
tcp_connection_keep_alive: 10,
pool_idle_connection_timeout: 30,
}
}
}
impl Config {
/// A function which parses the config.lua file and puts all the parsed options in the newly
/// constructed Config struct and returns it.
///
/// # Arguments
///
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
/// initialized twice. Pass false if the logger has not yet been initialized.
///
/// # Error
///
/// Returns a lua parse error if parsing of the config.lua file fails or has a syntax error
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
/// Config struct with all the parsed config options from the parsed config file.
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
let lua = Lua::new();
let globals = lua.globals();
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
.exec()?;
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
let debug: bool = globals.get::<_, bool>("debug")?;
let logging: bool = globals.get::<_, bool>("logging")?;
let adaptive_window: bool = globals.get::<_, bool>("adaptive_window")?;
if !logging_initialized {
set_logging_level(debug, logging);
}
let threads: u8 = if parsed_threads == 0 {
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
log::error!(
"Config Error: The value of `threads` option should be a non zero positive integer"
);
log::error!("Falling back to using {} threads", total_num_of_threads);
total_num_of_threads as u8
} else {
parsed_threads
};
let rate_limiter = globals.get::<_, HashMap<String, u8>>("rate_limiter")?;
let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
let safe_search: u8 = match parsed_safe_search {
0..=4 => parsed_safe_search,
_ => {
log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4.");
log::error!("Falling back to using the value `1` for the option");
1
}
};
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
let parsed_cet = globals.get::<_, u16>("cache_expiry_time")?;
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
let cache_expiry_time = match parsed_cet {
0..=59 => {
log::error!(
"Config Error: The value of `cache_expiry_time` must be greater than 60"
);
log::error!("Falling back to using the value `60` for the option");
60
}
_ => parsed_cet,
};
Ok(Config {
port: globals.get::<_, u16>("port")?,
binding_ip: globals.get::<_, String>("binding_ip")?,
style: Style::new(
globals.get::<_, String>("theme")?,
globals.get::<_, String>("colorscheme")?,
globals.get::<_, Option<String>>("animation")?,
),
#[cfg(feature = "redis-cache")]
redis_url: globals.get::<_, String>("redis_url")?,
aggregator: AggregatorConfig {
random_delay: globals.get::<_, bool>("production_use")?,
},
logging,
debug,
adaptive_window,
upstream_search_engines: globals
.get::<_, HashMap<String, bool>>("upstream_search_engines")?,
request_timeout: globals.get::<_, u8>("request_timeout")?,
tcp_connection_keep_alive: globals.get::<_, u8>("tcp_connection_keep_alive")?,
pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?,
threads,
client_connection_keep_alive: globals.get::<_, u8>("client_connection_keep_alive")?,
rate_limiter: RateLimiter {
number_of_requests: rate_limiter["number_of_requests"],
time_limit: rate_limiter["time_limit"],
},
safe_search,
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
cache_expiry_time,
})
/// Creates a new config based on the environment variables.
pub fn parse() -> Self {
Self::default()
}
}
/// a helper function that sets the proper logging level
///
/// # Arguments
///
/// * `debug` - It takes the option to whether enable or disable debug mode.
/// * `logging` - It takes the option to whether enable or disable logs.
fn set_logging_level(debug: bool, logging: bool) {
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
if pkg_env_var.to_lowercase() == "dev" {
env_logger::Builder::new()
.filter(None, LevelFilter::Trace)
.init();
return;
}
}
// Initializing logging middleware with level set to default or info.
let log_level = match (debug, logging) {
(true, true) => LevelFilter::Debug,
(true, false) => LevelFilter::Debug,
(false, true) => LevelFilter::Info,
(false, false) => LevelFilter::Error,
};
env_logger::Builder::new().filter(None, log_level).init();
}

View file

@ -47,7 +47,6 @@ impl SearchEngine for Bing {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Bing uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1

View file

@ -43,16 +43,9 @@ impl SearchEngine for Brave {
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
let safe_search_level = match safe_search {
0 => "off",
1 => "moderate",
_ => "strict",
};
let header_map = HeaderMap::try_from(&HashMap::from([
("User-Agent".to_string(), user_agent.to_string()),
(
@ -60,10 +53,7 @@ impl SearchEngine for Brave {
"application/x-www-form-urlencoded".to_string(),
),
("Referer".to_string(), "https://google.com/".to_string()),
(
"Cookie".to_string(),
format!("safe_search={safe_search_level}"),
),
("Cookie".to_string(), "safe_search=off".into()),
]))
.change_context(EngineError::UnexpectedError)?;

View file

@ -46,7 +46,6 @@ impl SearchEngine for DuckDuckGo {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.

View file

@ -61,7 +61,6 @@ impl SearchEngine for LibreX {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.

View file

@ -46,7 +46,6 @@ impl SearchEngine for Mojeek {
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Mojeek uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1
@ -75,21 +74,6 @@ impl SearchEngine for Mojeek {
let qss = search_engines.join("%2C");
// A branchless condition to check whether the `safe_search` parameter has the
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
// the value to 1 for all other values of `safe_search`
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
let safe = u8::from(safe_search != 0).to_string();
// Mojeek detects automated requests, these are preferences that are
// able to circumvent the countermeasure. Some of these are
// not documented in their Search API
@ -104,7 +88,6 @@ impl SearchEngine for Mojeek {
("hp", "minimal"),
("lb", "en"),
("qss", &qss),
("safe", &safe),
];
let mut query_params_string = String::new();

View file

@ -42,27 +42,8 @@ impl SearchEngine for Searx {
page: u32,
user_agent: &str,
client: &Client,
mut safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// A branchless condition to check whether the `safe_search` parameter has the
// value greater than equal to three or not. If it is, then it modifies the
// `safesearch` parameters value to 2.
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// safe_search = u8::from(safe_search == 3) * 2;
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
safe_search = u8::from(safe_search >= 3) * 2;
let url: String = format!(
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",
page + 1
);
let url: String = format!("https://searx.be/search?q={query}&pageno={}", page + 1);
// initializing headers and adding appropriate headers.
let header_map = HeaderMap::try_from(&HashMap::from([

View file

@ -46,7 +46,6 @@ impl SearchEngine for Startpage {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.

View file

@ -1,124 +0,0 @@
//! This main library module provides the functionality to provide and handle the Tcp server
//! and register all the routes for the `websurfx` meta search engine website.
#![forbid(unsafe_code, clippy::panic)]
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
pub mod cache;
pub mod config;
pub mod engines;
pub mod handler;
pub mod models;
pub mod results;
pub mod server;
pub mod templates;
use std::{net::TcpListener, sync::OnceLock, time::Duration};
use crate::server::router;
use actix_cors::Cors;
use actix_files as fs;
use actix_governor::{Governor, GovernorConfigBuilder};
use actix_web::{
dev::Server,
http::header,
middleware::{Compress, Logger},
web, App, HttpServer,
};
use cache::cacher::{Cacher, SharedCache};
use config::parser::Config;
use handler::{file_path, FileType};
/// A static constant for holding the cache struct.
static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
///
/// # Arguments
///
/// * `listener` - A `TcpListener` instance representing the address and port to listen on.
///
/// # Returns
///
/// Returns a `Result` containing a `Server` instance on success, or an `std::io::Error` on failure.
///
/// # Example
///
/// ```rust
/// use std::{net::TcpListener, sync::OnceLock};
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
///
/// /// A static constant for holding the parsed config.
/// static CONFIG: OnceLock<Config> = OnceLock::new();
///
/// #[tokio::main]
/// async fn main(){
/// // Initialize the parsed config globally.
/// let config = CONFIG.get_or_init(|| Config::parse(true).unwrap());
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
/// let cache = create_cache(config).await;
/// let server = run(listener,&config,cache).expect("Failed to start server");
/// }
/// ```
pub fn run(
listener: TcpListener,
config: &'static Config,
cache: impl Cacher + 'static,
) -> std::io::Result<Server> {
let public_folder_path: &str = file_path(FileType::Theme)?;
let cache = SHARED_CACHE.get_or_init(|| SharedCache::new(cache));
let server = HttpServer::new(move || {
let cors: Cors = Cors::default()
.allow_any_origin()
.allowed_methods(vec!["GET"])
.allowed_headers(vec![
header::ORIGIN,
header::CONTENT_TYPE,
header::REFERER,
header::COOKIE,
]);
App::new()
// Compress the responses provided by the server for the client requests.
.wrap(Compress::default())
.wrap(Logger::default()) // added logging middleware for logging.
.app_data(web::Data::new(config))
.app_data(web::Data::new(cache))
.wrap(cors)
.wrap(Governor::new(
&GovernorConfigBuilder::default()
.per_second(config.rate_limiter.time_limit as u64)
.burst_size(config.rate_limiter.number_of_requests as u32)
.finish()
.unwrap(),
))
// Serve images and static files (css and js files).
.service(
fs::Files::new("/static", format!("{}/static", public_folder_path))
.show_files_listing(),
)
.service(
fs::Files::new("/images", format!("{}/images", public_folder_path))
.show_files_listing(),
)
.service(router::robots_data) // robots.txt
.service(router::index) // index page
.service(server::routes::search::search) // search page
.service(router::about) // about page
.service(router::settings) // settings page
.default_service(web::route().to(router::not_found)) // error page
})
.workers(config.threads as usize)
// Set the keep-alive timer for client connections
.keep_alive(Duration::from_secs(
config.client_connection_keep_alive as u64,
))
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
.listen(listener)?
.run();
Ok(server)
}

View file

@ -35,7 +35,7 @@ impl SearchResult {
///
/// * `title` - The title of the search result.
/// * `url` - The url which is accessed when clicked on it
/// (href url in html in simple words).
/// (href url in html in simple words).
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
@ -125,7 +125,7 @@ impl EngineErrorInfo {
/// # Arguments
///
/// * `error` - It takes the error type which occured while fetching the result from a particular
/// search engine.
/// search engine.
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
pub fn new(error: &EngineError, engine: &str) -> Self {
Self {
@ -178,11 +178,11 @@ impl SearchResults {
/// # Arguments
///
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
/// and stores it into a vector of `SearchResult` structs.
/// and stores it into a vector of `SearchResult` structs.
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
/// the search url.
/// the search url.
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
/// which engines failed with their names, reason and their severity color name.
/// which engines failed with their names, reason and their severity color name.
pub fn new(results: Vec<SearchResult>, engine_errors_info: &[EngineErrorInfo]) -> Self {
Self {
results,

View file

@ -146,7 +146,6 @@ pub trait SearchEngine: Sync + Send {
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError>;
}

View file

@ -10,7 +10,7 @@
/// order to allow the deserializing the json back to struct in aggregate function in
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
/// it to the template files.
#[derive(Default)]
#[derive(Default, Debug, Clone)]
pub struct Style {
/// It stores the parsed theme option used to set a theme for the website.
pub theme: String,
@ -29,7 +29,7 @@ impl Style {
///
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
/// for the theme being used.
/// for the theme being used.
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
Style {
theme,

View file

@ -30,19 +30,16 @@ pub struct Cookie<'a> {
pub colorscheme: Cow<'a, str>,
/// It stores the user selected upstream search engines selected from the UI.
pub engines: Cow<'a, Vec<Cow<'a, str>>>,
/// It stores the user selected safe search level from the UI.
pub safe_search_level: u8,
}
impl<'a> Cookie<'a> {
/// server_models::Cookie contructor function
pub fn build(style: &'a Style, mut engines: Vec<Cow<'a, str>>, safe_search_level: u8) -> Self {
pub fn build(style: &'a Style, mut engines: Vec<Cow<'a, str>>) -> Self {
engines.sort();
Self {
theme: Cow::Borrowed(&style.theme),
colorscheme: Cow::Borrowed(&style.colorscheme),
engines: Cow::Owned(engines),
safe_search_level,
}
}
}

View file

@ -3,7 +3,6 @@
use super::user_agent::random_user_agent;
use crate::config::parser::Config;
use crate::handler::{file_path, FileType};
use crate::models::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
engine_models::{EngineError, EngineHandler},
@ -14,7 +13,6 @@ use futures::stream::FuturesUnordered;
use regex::Regex;
use reqwest::{Client, ClientBuilder};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::{
fs::File,
io::{AsyncBufReadExt, BufReader},
@ -61,7 +59,7 @@ type FutureVec =
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
/// user through the UI or the config file.
/// user through the UI or the config file.
///
/// # Error
///
@ -71,9 +69,8 @@ type FutureVec =
pub async fn aggregate(
query: &str,
page: u32,
config: &Config,
config: actix_web::web::Data<crate::config::parser::Config>,
upstream_search_engines: &[EngineHandler],
safe_search: u8,
) -> Result<SearchResults, Box<dyn std::error::Error>> {
let client = CLIENT.get_or_init(|| {
ClientBuilder::new()
@ -93,13 +90,6 @@ pub async fn aggregate(
let user_agent: &str = random_user_agent();
// Add a random delay before making the request.
if config.aggregator.random_delay || !config.debug {
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
tokio::time::sleep(Duration::from_secs(delay)).await;
}
let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching
@ -112,13 +102,7 @@ pub async fn aggregate(
let query_partially_cloned = query.clone();
tasks.push(tokio::spawn(async move {
search_engine
.results(
&query_partially_cloned,
page,
user_agent,
client,
safe_search,
)
.results(&query_partially_cloned, page, user_agent, client)
.await
}));
}
@ -169,25 +153,6 @@ pub async fn aggregate(
};
}
if safe_search >= 3 {
let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new();
filter_with_lists(
&mut result_map,
&mut blacklist_map,
file_path(FileType::BlockList)?,
)
.await?;
filter_with_lists(
&mut blacklist_map,
&mut result_map,
file_path(FileType::AllowList)?,
)
.await?;
drop(blacklist_map);
}
let mut results: Vec<SearchResult> = result_map
.iter()
.map(|(_, value)| {

View file

@ -11,9 +11,7 @@ use tokio::fs::read_to_string;
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")]
pub async fn index(
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::index::index(
&config.style.colorscheme,
@ -27,7 +25,7 @@ pub async fn index(
/// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page.
pub async fn not_found(
config: web::Data<&'static Config>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::not_found::not_found(
@ -51,9 +49,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
/// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")]
pub async fn about(
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::about::about(
&config.style.colorscheme,
@ -67,15 +63,14 @@ pub async fn about(
/// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")]
pub async fn settings(
config: web::Data<&'static Config>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::settings::settings(
config.safe_search,
&config.style.colorscheme,
&config.style.theme,
&config.style.animation,
&config.upstream_search_engines,
//&config.upstream_search_engines,
)?
.0,
))

View file

@ -1,9 +1,8 @@
//! This module handles the search route of the search engine website.
use crate::{
cache::cacher::SharedCache,
cache::cacher::Cache,
config::parser::Config,
handler::{file_path, FileType},
models::{
aggregation_models::SearchResults,
engine_models::EngineHandler,
@ -12,13 +11,8 @@ use crate::{
results::aggregator::aggregate,
};
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use regex::Regex;
use std::borrow::Cow;
use tokio::{
fs::File,
io::{AsyncBufReadExt, BufReader},
join,
};
use tokio::join;
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional.
@ -37,8 +31,8 @@ use tokio::{
#[get("/search")]
pub async fn search(
req: HttpRequest,
config: web::Data<&'static Config>,
cache: web::Data<&'static SharedCache>,
config: web::Data<Config>,
cache: web::Data<Cache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
@ -53,7 +47,7 @@ pub async fn search(
let cookie = req.cookie("appCookie");
// Get search settings using the user's cookie or from the server's config
let mut search_settings: server_models::Cookie<'_> = cookie
let search_settings: server_models::Cookie<'_> = cookie
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
.unwrap_or_else(|| {
server_models::Cookie::build(
@ -61,22 +55,14 @@ pub async fn search(
config
.upstream_search_engines
.iter()
.filter_map(|(engine, enabled)| {
enabled.then_some(Cow::Borrowed(engine.as_str()))
})
.map(|e| Cow::Borrowed(e.as_str()))
.collect(),
config.safe_search,
)
});
search_settings.safe_search_level = get_safesearch_level(
params.safesearch,
search_settings.safe_search_level,
config.safe_search,
);
// Closure wrapping the results function capturing local references
let get_results = |page| results(&config, &cache, query, page, &search_settings);
let get_results =
|page| results(config.clone(), cache.clone(), query, page, &search_settings);
// .max(1) makes sure that the page >= 0.
let page = params.page.unwrap_or(1).max(1) - 1;
@ -108,7 +94,7 @@ pub async fn search(
results = Arc::new(current_results?);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys) });
} else {
let (current_results, next_results) =
join!(get_results(page), get_results(page + 1));
@ -122,7 +108,7 @@ pub async fn search(
[results.0.clone(), parsed_next_results.0],
);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys) });
}
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
@ -148,7 +134,7 @@ pub async fn search(
/// # Arguments
///
/// * `url` - It takes the url of the current page that requested the search results for a
/// particular search query.
/// particular search query.
/// * `config` - It takes a parsed config struct.
/// * `query` - It takes the page number as u32 value.
/// * `req` - It takes the `HttpRequest` struct as a value.
@ -158,48 +144,30 @@ pub async fn search(
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
async fn results(
config: &'static Config,
cache: &'static SharedCache,
config: web::Data<crate::config::parser::Config>,
cache: web::Data<crate::cache::cacher::Cache>,
query: &str,
page: u32,
search_settings: &server_models::Cookie<'_>,
) -> Result<(SearchResults, String), Box<dyn std::error::Error>> {
// eagerly parse cookie value to evaluate safe search level
let safe_search_level = search_settings.safe_search_level;
let cache_key = format!(
"http://{}:{}/search?q={}&page={}&safesearch={}&engines={}",
"http://{}:{}/search?q={}&page={}&engines={}",
config.binding_ip,
config.port,
query,
page,
safe_search_level,
search_settings.engines.join(",")
);
// fetch the cached results json.
let cached_results = cache.cached_results(&cache_key).await;
let cached_results = cache.cached_results(&cache_key);
// check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly.
match cached_results {
Ok(results) => Ok((results, cache_key)),
Err(_) => {
if safe_search_level == 4 {
let mut results: SearchResults = SearchResults::default();
let flag: bool =
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
// Return early when query contains disallowed words,
if flag {
results.set_disallowed();
cache
.cache_results(&[results.clone()], &[cache_key.clone()])
.await?;
results.set_safe_search_level(safe_search_level);
return Ok((results, cache_key));
}
}
Some(results) => Ok((results, cache_key)),
None => {
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
@ -215,7 +183,6 @@ async fn results(
.iter()
.filter_map(|engine| EngineHandler::new(engine).ok())
.collect::<Vec<EngineHandler>>(),
safe_search_level,
)
.await?
}
@ -231,69 +198,12 @@ async fn results(
results.no_engines_selected(),
);
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
cache
.cache_results(&[results.clone()], &[cache_key.clone()])
.await?;
results.set_safe_search_level(safe_search_level);
cache.cache_results(&[results.clone()], &[cache_key.clone()]);
Ok((results, cache_key))
}
}
}
/// A helper function which checks whether the search query contains any keywords which should be
/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
///
/// # Arguments
///
/// * `file_path` - It takes the file path of the list as the argument.
/// * `query` - It takes the search query to be checked against the list as an argument.
///
/// # Error
///
/// Returns a bool indicating whether the results were found in the list or not on success
/// otherwise returns a standard error type on a failure.
async fn is_match_from_filter_list(
file_path: &str,
query: &str,
) -> Result<bool, Box<dyn std::error::Error>> {
let reader = BufReader::new(File::open(file_path).await?);
let mut lines = reader.lines();
while let Some(line) = lines.next_line().await? {
let re = Regex::new(&line)?;
if re.is_match(query) {
return Ok(true);
}
}
Ok(false)
}
/// A helper function to choose the safe search level value based on the URL parameters,
/// cookie value and config value.
///
/// # Argurments
///
/// * `safe_search_level_from_url` - Safe search level from the URL parameters.
/// * `cookie_safe_search_level` - Safe search level value from the cookie.
/// * `config_safe_search_level` - Safe search level value from the config file.
///
/// # Returns
///
/// Returns an appropriate safe search level value based on the safe search level values
/// from the URL parameters, cookie and the config file.
fn get_safesearch_level(
safe_search_level_from_url: Option<u8>,
cookie_safe_search_level: u8,
config_safe_search_level: u8,
) -> u8 {
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)))
}
#[cfg(test)]
mod tests {
use std::time::{SystemTime, UNIX_EPOCH};

View file

@ -12,7 +12,7 @@ const SAFE_SEARCH_LEVELS_NAME: [&str; 3] = ["None", "Low", "Moderate"];
/// # Arguments
///
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
/// search engine which failed to provide results as an argument.
/// search engine which failed to provide results as an argument.
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
/// * `query` - It takes the current search query provided by user as an argument.
///

View file

@ -9,7 +9,7 @@ use maud::{html, Markup};
/// # Arguments
///
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
/// selected (enabled/disabled) value as an argument.
/// selected (enabled/disabled) value as an argument.
///
/// # Returns
///

View file

@ -1,42 +0,0 @@
//! A module that handles the general tab for setting page view in the `websurfx` frontend.
use maud::{html, Markup};
/// A constant holding the named safe search level options for the corresponding values 0, 1 and 2.
const SAFE_SEARCH_LEVELS: [(u8, &str); 3] = [(0, "None"), (1, "Low"), (2, "Moderate")];
/// A functions that handles the html code for the general tab for the settings page for the search page.
///
/// # Arguments
///
/// * `safe_search_level` - It takes the safe search level as an argument.
///
/// # Returns
///
/// It returns the compiled html markup code for the general tab.
pub fn general(safe_search_level: u8) -> Markup {
html!(
div class="general tab active"{
h1{"General"}
h3{"Select a safe search level"}
p class="description"{
"Select a safe search level from the menu below to filter content based on the level."
}
@if safe_search_level < 3 {
select name="safe_search_levels" {
// Sets the user selected safe_search_level name from the config file as the first option in the selection list.
option value=(safe_search_level){(SAFE_SEARCH_LEVELS.iter().find(|level| level.0 == safe_search_level).unwrap().1)}
@for (k,v) in SAFE_SEARCH_LEVELS.iter().filter(|level| level.0 != safe_search_level){
option value=(k){(v)}
}
}
}
@else {
p class="admin_warning" {"⚠️ This setting is being managed by the server administrator."}
select name="safe_search_levels" disabled {
option value=(SAFE_SEARCH_LEVELS[2].0){(SAFE_SEARCH_LEVELS[2].1)}
}
}
}
)
}

View file

@ -3,5 +3,4 @@
pub mod cookies;
pub mod engines;
pub mod general;
pub mod user_interface;

View file

@ -11,9 +11,9 @@ use std::fs::read_dir;
/// # Arguments
///
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
/// argument.
/// argument.
/// * `selected_style` - It takes the currently selected style value provided via the config file
/// as an argument.
/// as an argument.
///
/// # Error
///

View file

@ -7,9 +7,7 @@ use maud::{html, Markup};
use crate::templates::partials::{
footer::footer,
header::header,
settings_tabs::{
cookies::cookies, engines::engines, general::general, user_interface::user_interface,
},
settings_tabs::{cookies::cookies, engines::engines, user_interface::user_interface},
};
/// A function that handles the html code for the settings page view in the search engine frontend.
@ -27,11 +25,10 @@ use crate::templates::partials::{
/// This function returns a compiled html markup code on success otherwise returns a standard error
/// message.
pub fn settings(
safe_search_level: u8,
colorscheme: &str,
theme: &str,
animation: &Option<String>,
engine_names: &HashMap<String, bool>,
//engine_names: &HashMap<String, bool>,
) -> Result<Markup, Box<dyn std::error::Error>> {
Ok(html!(
(header(colorscheme, theme, animation))
@ -46,9 +43,8 @@ pub fn settings(
.btn onclick="setActiveTab(this)"{"cookies"}
}
.main_container{
(general(safe_search_level))
(user_interface(theme, colorscheme, animation)?)
(engines(engine_names))
//(engines(engine_names))
(cookies())
p class="message"{}
button type="submit" onclick="setClientSettings()"{"Save"}