strip garbage

This commit is contained in:
Milim 2024-08-14 21:34:18 +02:00
parent d5524d7eae
commit 6567af268e
No known key found for this signature in database
36 changed files with 496 additions and 2434 deletions

3
.gitpod.Dockerfile vendored
View file

@ -1,3 +0,0 @@
FROM gitpod/workspace-rust
RUN sudo install-packages redis-server nodejs npm liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev

View file

@ -1,50 +0,0 @@
---
image:
file: .gitpod.Dockerfile
# Commands that will run on workspace start
tasks:
- name: Start Redis Server
command: redis-server --port 8082
- name: Run The App
init: cargo build
command: PKG_ENV=dev ./target/debug/websurfx
- name: Tests
command: cargo test
- name: Clippy Checks
command: cargo clippy
# vscode IDE setup
vscode:
extensions:
- vadimcn.vscode-lldb
- cschleiden.vscode-github-actions
- rust-lang.rust-analyzer
- bungcip.better-toml
- serayuzgur.crates
- usernamehw.errorlens
- DavidAnson.vscode-markdownlint
- esbenp.prettier-vscode
- stylelint.vscode-stylelint
- dbaeumer.vscode-eslint
- evgeniypeshkov.syntax-highlighter
- ms-azuretools.vscode-docker
- Catppuccin.catppuccin-vsc
- PKief.material-icon-theme
- oderwat.indent-rainbow
- formulahendry.auto-rename-tag
- swellaby.vscode-rust-test-adapter
- belfz.search-crates-io
- hbenl.test-adapter-converter
- hbenl.vscode-test-explorer
- eamodio.gitlens
github:
prebuilds:
master: true
branches: true
pullRequests: true
pullRequestsFromForks: true
addCheck: true
addComment: false
addBadge: true

View file

@ -1,5 +0,0 @@
[hooks]
pre-commit = "cargo test && cargo fmt -- --check && cargo clippy && stylelint ./public/static/themes/*.css ./public/static/colorschemes/*.css ./public/static/*.js"
[logging]
verbose = true

1324
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -6,12 +6,6 @@ description = "An open-source alternative to Searx that provides clean, ad-free,
repository = "https://github.com/neon-mmd/websurfx"
license = "AGPL-3.0"
[[bin]]
name = "websurfx"
test = true
bench = false
path = "src/bin/websurfx.rs"
[dependencies]
reqwest = { version = "0.12.4", default-features = false, features = [
"rustls-tls",
@ -41,15 +35,6 @@ actix-cors = { version = "0.7.0", default-features = false }
fake-useragent = { version = "0.1.3", default-features = false }
env_logger = { version = "0.11.1", default-features = false }
log = { version = "0.4.21", default-features = false }
mlua = { version = "0.9.8", features = [
"luajit",
"vendored",
], default-features = false }
redis = { version = "0.25.4", features = [
"tokio-comp",
"connection-manager",
], default-features = false, optional = true }
blake3 = { version = "1.5.0", default-features = false }
error-stack = { version = "0.4.0", default-features = false, features = [
"std",
] }
@ -60,25 +45,14 @@ smallvec = { version = "1.13.1", features = [
"serde",
], default-features = false }
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
dhat = { version = "0.3.2", optional = true, default-features = false }
mimalloc = { version = "0.1.38", default-features = false }
async-once-cell = { version = "0.5.3", default-features = false }
actix-governor = { version = "0.5.0", default-features = false }
mini-moka = { version = "0.10", optional = true, default-features = false, features = [
mini-moka = { version = "0.10", default-features = false, features = [
"sync",
] }
async-compression = { version = "0.4.11", default-features = false, features = [
"brotli",
"tokio",
], optional = true }
chacha20poly1305 = { version = "0.10.1", default-features = false, features = [
"alloc",
"getrandom",
], optional = true }
chacha20 = { version = "0.9.1", default-features = false, optional = true }
base64 = { version = "0.21.5", default-features = false, features = [
"std",
], optional = true }
cfg-if = { version = "1.0.0", default-features = false, optional = true }
keyword_extraction = { version = "1.4.3", default-features = false, features = [
"tf_idf",
@ -92,55 +66,11 @@ thesaurus = { version = "0.5.2", default-features = false, optional = true, feat
] }
[dev-dependencies]
rusty-hook = { version = "^0.11.2", default-features = false }
criterion = { version = "0.5.1", default-features = false }
tempfile = { version = "3.10.1", default-features = false }
[build-dependencies]
lightningcss = { version = "1.0.0-alpha.57", default-features = false, features = [
"grid",
] }
# Disabled until bug fixing update
# minify-js = { version = "0.6.0", default-features = false }
# Temporary fork with fix
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}
[profile.dev]
opt-level = 0
debug = true
split-debuginfo = 'unpacked'
debug-assertions = true
overflow-checks = true
lto = false
panic = 'unwind'
incremental = true
codegen-units = 256
rpath = false
[profile.release]
opt-level = 3
debug = false # This should only be commented when testing with dhat profiler
# debug = 1 # This should only be uncommented when testing with dhat profiler
split-debuginfo = '...'
debug-assertions = false
overflow-checks = false
lto = 'thin'
panic = 'abort'
incremental = false
codegen-units = 1
rpath = false
strip = "symbols"
[features]
use-synonyms-search = ["thesaurus/static"]
default = ["memory-cache"]
dhat-heap = ["dep:dhat"]
memory-cache = ["dep:mini-moka"]
redis-cache = ["dep:redis", "dep:base64"]
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
cec-cache-results = ["compress-cache-results", "encrypt-cache-results"]
experimental-io-uring = ["actix-web/experimental-io-uring"]
use-non-static-synonyms-search = ["thesaurus"]

View file

@ -1,5 +0,0 @@
# Security Policy
We love responsible reports of (potential) security issues in Websurfx.
Be sure to provide as much information as possible and if found also reproduction steps of the identified vulnerability. Also add the specific URL of the project as well as code you found the issue in to your report.

View file

@ -1,85 +0,0 @@
//! A build module of the application which minifies the project's css and js files on build which
//! helps reduce the initial page by loading the files faster.
#![forbid(unsafe_code, clippy::panic)]
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
// ------- Imports -------
use lightningcss::stylesheet::{MinifyOptions, ParserOptions, PrinterOptions, StyleSheet};
use minify_js::{minify, Session, TopLevelMode};
use std::{
fs::{read_dir, read_to_string, File, OpenOptions},
io::{Read, Write},
};
// ------- Constants -------
/// A constant for the path to the public/theme folder in the codebase.
const COMMON_STATIC_SOURCE_CODE_FOLDER: &str = "./public/static/";
/// A constant for the names of the folders located in the "/public/static/"
/// folder in the codebase which contains the css files to be minified.
const STYLE_FOLDERS: [&str; 2] = ["themes", "colorschemes"];
/// A constant for the environment variable name.
const PACKAGE_ENVIRONMENT_VARIABLE: &str = "PKG_ENV";
/// A constant for the `prod` value of the `pkg_env` environment variable.
const PRODUCTION_PKG_ENV_VARIABLE_VALUE: &str = "prod";
/// A main function which minifies both css and js files using `lightningcss` and `minify_js` when
/// the `PKG_ENV` environment and it is set to the value of `prod`.
///
/// # Error
///
/// This function returns the unit type when the minification process runs successfully otherwise
/// it returns a standard error.
fn main() -> Result<(), Box<dyn std::error::Error>> {
if let Ok(pkg_env_var) = std::env::var(PACKAGE_ENVIRONMENT_VARIABLE) {
if pkg_env_var.to_lowercase() == PRODUCTION_PKG_ENV_VARIABLE_VALUE {
// A for loop that loops over each file name containing in the `colorschemes` and `themes` folders
// and minifies it using the `lightningcss` minifier.
for folder_name in STYLE_FOLDERS {
for file in read_dir(format!("{COMMON_STATIC_SOURCE_CODE_FOLDER}{folder_name}/"))? {
let file_path = file?.path();
let source = read_to_string(file_path.clone())?;
let mut stylesheet = StyleSheet::parse(&source, ParserOptions::default())
.map_err(|err| format!("{err}\n{:?}", file_path.file_name().unwrap()))?;
stylesheet.minify(MinifyOptions::default())?;
let minified_css = stylesheet.to_css(PrinterOptions::default())?;
let mut old_css_file = OpenOptions::new()
.write(true)
.truncate(true)
.open(file_path)?;
old_css_file.write_all(minified_css.code.as_bytes())?;
old_css_file.flush()?;
}
}
// A for loop that loops over each file name containing in the `public/static` folder and minifies
// it using the `minify-js` minifier.
for file in read_dir(COMMON_STATIC_SOURCE_CODE_FOLDER)? {
let file_path = file?.path();
if file_path.is_file() {
let mut code = Vec::new();
let mut js_file = File::open(file_path.clone())?;
js_file.read_to_end(&mut code)?;
drop(js_file);
let mut out = Vec::new();
minify(&Session::new(), TopLevelMode::Global, &code, &mut out)
.map_err(|err| format!("{err}\n{:?}", file_path.file_name().unwrap()))?;
let mut old_js_file = OpenOptions::new()
.write(true)
.truncate(true)
.open(file_path)?;
old_js_file.write_all(&out)?;
old_js_file.flush()?;
}
}
}
}
Ok(())
}

View file

@ -1,54 +0,0 @@
//! Main module of the application
//!
//! This module contains the main function which handles the logging of the application to the
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
#[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc;
use std::{net::TcpListener, sync::OnceLock};
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
/// A dhat heap memory profiler
#[cfg(feature = "dhat-heap")]
#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;
#[cfg(not(feature = "dhat-heap"))]
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
/// The function that launches the main server and registers all the routes of the website.
///
/// # Error
///
/// Returns an error if the port is being used by something else on the system and is not
/// available for being used for other applications.
#[actix_web::main]
async fn main() -> std::io::Result<()> {
// A dhat heap profiler initialization.
#[cfg(feature = "dhat-heap")]
let _profiler = dhat::Profiler::new_heap();
// Initialize the parsed config globally.
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
let cache = create_cache(config).await;
log::info!(
"started server on port {} and IP {}",
config.port,
config.binding_ip
);
log::info!(
"Open http://{}:{}/ in your browser",
config.binding_ip,
config.port,
);
let listener = TcpListener::bind((config.binding_ip.as_str(), config.port))?;
run(listener, config, cache)?.await
}

570
src/cache/cacher.rs vendored
View file

@ -2,583 +2,57 @@
//! from the upstream search engines in a json format.
use error_stack::Report;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::Cache as MokaCache;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::ConcurrentCacheExt;
#[cfg(feature = "memory-cache")]
use std::time::Duration;
use tokio::sync::Mutex;
use crate::{config::parser::Config, models::aggregation_models::SearchResults};
use super::error::CacheError;
#[cfg(feature = "redis-cache")]
use super::redis_cacher::RedisCache;
#[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))]
use super::encryption::*;
/// Abstraction trait for common methods provided by a cache backend.
#[async_trait::async_trait]
pub trait Cacher: Send + Sync {
// A function that builds the cache from the given configuration.
///
/// # Arguments
///
/// * `config` - It takes the config struct as an argument.
///
/// # Returns
///
/// It returns a newly initialized backend based on the feature enabled by the user.
async fn build(config: &Config) -> Self
where
Self: Sized;
/// A function which fetches the cached json results as json string.
///
/// # Arguments
///
/// * `url` - It takes an url as a string.
///
/// # Error
///
/// Returns the `SearchResults` from the cache if the program executes normally otherwise
/// returns a `CacheError` if the results cannot be retrieved from the cache.
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>>;
/// A function which caches the results by using the `url` as the key and
/// `json results` as the value and stores it in the cache
///
/// # Arguments
///
/// * `json_results` - It takes the json results string as an argument.
/// * `url` - It takes the url as a String.
///
/// # Error
///
/// Returns a unit type if the program caches the given search results without a failure
/// otherwise it returns a `CacheError` if the search results cannot be cached due to a
/// failure.
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>>;
/// A helper function which computes the hash of the url and formats and returns it as string.
///
/// # Arguments
///
/// * `url` - It takes an url as string.
fn hash_url(&self, url: &str) -> String {
blake3::hash(url.as_bytes()).to_string()
}
/// A helper function that returns either encrypted or decrypted results.
/// Feature flags (**encrypt-cache-results or cec-cache-results**) are required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
/// * `encrypt` - A boolean to choose whether to encrypt or decrypt the bytes
///
/// # Error
/// Returns either encrypted or decrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(
// feature = "compress-cache-results",
feature = "encrypt-cache-results",
feature = "cec-cache-results"
))]
async fn encrypt_or_decrypt_results(
&mut self,
mut bytes: Vec<u8>,
encrypt: bool,
) -> Result<Vec<u8>, Report<CacheError>> {
use chacha20poly1305::{
aead::{Aead, AeadCore, KeyInit, OsRng},
ChaCha20Poly1305,
};
let cipher = CIPHER.get_or_init(|| {
let key = ChaCha20Poly1305::generate_key(&mut OsRng);
ChaCha20Poly1305::new(&key)
});
let encryption_key = ENCRYPTION_KEY.get_or_init(
|| ChaCha20Poly1305::generate_nonce(&mut OsRng), // 96-bits; unique per message
);
bytes = if encrypt {
cipher
.encrypt(encryption_key, bytes.as_ref())
.map_err(|_| CacheError::EncryptionError)?
} else {
cipher
.decrypt(encryption_key, bytes.as_ref())
.map_err(|_| CacheError::EncryptionError)?
};
Ok(bytes)
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the compressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliEncoder::new(Vec::new());
writer
.write_all(&bytes)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?;
bytes = writer.into_inner();
Ok(bytes)
}
/// A helper function that returns compressed-encrypted results.
/// Feature flag (**cec-cache-results**) is required for this to work.
///
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(feature = "cec-cache-results")]
async fn compress_encrypt_compress_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<Vec<u8>, Report<CacheError>> {
// compress first
bytes = self.compress_results(bytes).await?;
// encrypt
bytes = self.encrypt_or_decrypt_results(bytes, true).await?;
// compress again;
bytes = self.compress_results(bytes).await?;
Ok(bytes)
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
/// If bytes where
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the uncompressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_results(&mut self, bytes: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
cfg_if::cfg_if! {
if #[cfg(feature = "compress-cache-results")]
{
decompress_util(bytes).await
}
else if #[cfg(feature = "cec-cache-results")]
{
let decompressed = decompress_util(bytes)?;
let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?;
decompress_util(&decrypted).await
}
}
}
/// A helper function that compresses or encrypts search results before they're inserted into a cache store
/// # Arguments
///
/// * `search_results` - A reference to the search_Results to process.
///
///
/// # Error
/// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError
/// on failure.
async fn pre_process_search_results(
&mut self,
search_results: &SearchResults,
) -> Result<Vec<u8>, Report<CacheError>> {
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
let mut bytes: Vec<u8> = search_results.try_into()?;
#[cfg(feature = "compress-cache-results")]
{
let compressed = self.compress_results(bytes).await?;
bytes = compressed;
}
#[cfg(feature = "encrypt-cache-results")]
{
let encrypted = self.encrypt_or_decrypt_results(bytes, true).await?;
bytes = encrypted;
}
#[cfg(feature = "cec-cache-results")]
{
let compressed_encrypted_compressed =
self.compress_encrypt_compress_results(bytes).await?;
bytes = compressed_encrypted_compressed;
}
Ok(bytes)
}
/// A helper function that decompresses or decrypts search results after they're fetched from the cache-store
/// # Arguments
///
/// * `bytes` - A Vec of bytes stores in the cache.
///
///
/// # Error
/// Returns the SearchResults struct on success otherwise it returns a CacheError
/// on failure.
#[allow(unused_mut)] // needs to be mutable when any of the features is enabled
async fn post_process_search_results(
&mut self,
mut bytes: Vec<u8>,
) -> Result<SearchResults, Report<CacheError>> {
#[cfg(feature = "compress-cache-results")]
{
let decompressed = self.decompress_results(&bytes).await?;
bytes = decompressed
}
#[cfg(feature = "encrypt-cache-results")]
{
let decrypted = self.encrypt_or_decrypt_results(bytes, false).await?;
bytes = decrypted
}
#[cfg(feature = "cec-cache-results")]
{
let decompressed_decrypted = self.decompress_results(&bytes).await?;
bytes = decompressed_decrypted;
}
Ok(bytes.try_into()?)
impl Into<SearchResults> for Vec<u8> {
fn into(self) -> SearchResults {
serde_json::from_slice(&self)
.expect("well, this can only be caused by memory corruption so good luck")
}
}
/// A helper function that returns compressed results.
/// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work.
/// If bytes where
/// # Arguments
///
/// * `bytes` - It takes a slice of bytes as an argument.
///
/// # Error
/// Returns the uncompressed bytes on success otherwise it returns a CacheError
/// on failure.
#[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))]
async fn decompress_util(input: &[u8]) -> Result<Vec<u8>, Report<CacheError>> {
use tokio::io::AsyncWriteExt;
let mut writer = async_compression::tokio::write::BrotliDecoder::new(Vec::new());
writer
.write_all(input)
.await
.map_err(|_| CacheError::CompressionError)?;
writer
.shutdown()
.await
.map_err(|_| CacheError::CompressionError)?;
let bytes = writer.into_inner();
Ok(bytes)
}
#[cfg(feature = "redis-cache")]
#[async_trait::async_trait]
impl Cacher for RedisCache {
async fn build(config: &Config) -> Self {
log::info!(
"Initialising redis cache. Listening to {}",
&config.redis_url
);
RedisCache::new(&config.redis_url, 5, config.cache_expiry_time)
.await
.expect("Redis cache configured")
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
use base64::Engine;
let hashed_url_string: &str = &self.hash_url(url);
let base64_string = self.cached_json(hashed_url_string).await?;
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(base64_string)
.map_err(|_| CacheError::Base64DecodingOrEncodingError)?;
self.post_process_search_results(bytes).await
}
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
use base64::Engine;
// size of search_results is expected to be equal to size of urls -> key/value pairs for cache;
let search_results_len = search_results.len();
let mut bytes = Vec::with_capacity(search_results_len);
for result in search_results {
let processed = self.pre_process_search_results(result).await?;
bytes.push(processed);
}
let base64_strings = bytes
.iter()
.map(|bytes_vec| base64::engine::general_purpose::STANDARD_NO_PAD.encode(bytes_vec));
let mut hashed_url_strings = Vec::with_capacity(search_results_len);
for url in urls {
let hash = self.hash_url(url);
hashed_url_strings.push(hash);
}
self.cache_json(base64_strings, hashed_url_strings.into_iter())
.await
}
}
/// TryInto implementation for SearchResults from Vec<u8>
use std::convert::TryInto;
impl TryInto<SearchResults> for Vec<u8> {
type Error = CacheError;
fn try_into(self) -> Result<SearchResults, Self::Error> {
serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
}
}
impl TryInto<Vec<u8>> for &SearchResults {
type Error = CacheError;
fn try_into(self) -> Result<Vec<u8>, Self::Error> {
serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
impl Into<Vec<u8>> for &SearchResults {
fn into(self) -> Vec<u8> {
serde_json::to_vec(self).expect("somehow failed to serialize search results")
}
}
/// Memory based cache backend.
#[cfg(feature = "memory-cache")]
pub struct InMemoryCache {
#[derive(Clone)]
pub struct Cache {
/// The backend cache which stores data.
cache: MokaCache<String, Vec<u8>>,
}
#[cfg(feature = "memory-cache")]
#[async_trait::async_trait]
impl Cacher for InMemoryCache {
async fn build(config: &Config) -> Self {
log::info!("Initialising in-memory cache");
impl Cache {
/// Build new cache
pub fn build(config: &Config) -> Self {
log::info!("Initializing in-memory cache");
InMemoryCache {
Self {
cache: MokaCache::builder()
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
.time_to_live(Duration::from_secs(config.cache_expiry_time))
.build(),
}
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) {
Some(res) => self.post_process_search_results(res).await,
None => Err(Report::new(CacheError::MissingValue)),
}
/// Retrieve Cached results
pub fn cached_results(&self, url: &str) -> Option<SearchResults> {
self.cache.get(&url.to_string()).map(|b| b.into())
}
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
/// Cache results
pub fn cache_results(&self, search_results: &[SearchResults], urls: &[String]) {
for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result).await?;
self.cache.insert(hashed_url_string, bytes);
self.cache.insert(url.clone(), search_result.into());
}
self.cache.sync();
Ok(())
}
}
/// Cache backend which utilises both memory and redis based caches.
///
/// The hybrid cache system uses both the types of cache to ensure maximum availability.
/// The set method sets the key, value pair in both the caches. Therefore in a case where redis
/// cache becomes unavailable, the backend will retreive the value from in-memory cache.
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
pub struct HybridCache {
/// The in-memory backend cache which stores data.
memory_cache: InMemoryCache,
/// The redis backend cache which stores data.
redis_cache: RedisCache,
}
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
#[async_trait::async_trait]
impl Cacher for HybridCache {
async fn build(config: &Config) -> Self {
log::info!("Initialising hybrid cache");
HybridCache {
memory_cache: InMemoryCache::build(config).await,
redis_cache: RedisCache::build(config).await,
}
}
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
match self.redis_cache.cached_results(url).await {
Ok(res) => Ok(res),
Err(_) => self.memory_cache.cached_results(url).await,
}
}
async fn cache_results(
&mut self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
self.redis_cache.cache_results(search_results, urls).await?;
self.memory_cache
.cache_results(search_results, urls)
.await?;
Ok(())
}
}
/// Dummy cache backend
pub struct DisabledCache;
#[async_trait::async_trait]
impl Cacher for DisabledCache {
async fn build(_config: &Config) -> Self {
log::info!("Caching is disabled");
DisabledCache
}
async fn cached_results(&mut self, _url: &str) -> Result<SearchResults, Report<CacheError>> {
Err(Report::new(CacheError::MissingValue))
}
async fn cache_results(
&mut self,
_search_results: &[SearchResults],
_urls: &[String],
) -> Result<(), Report<CacheError>> {
Ok(())
}
}
/// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
pub struct SharedCache {
/// The internal cache protected from concurrent access by a mutex
cache: Mutex<Box<dyn Cacher>>,
}
impl SharedCache {
/// A function that creates a new `SharedCache` from a Cache implementation.
///
/// # Arguments
///
/// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type.
///
/// Returns a newly constructed `SharedCache` struct.
pub fn new(cache: impl Cacher + 'static) -> Self {
Self {
cache: Mutex::new(Box::new(cache)),
}
}
/// A getter function which retrieves the cached SearchResulsts from the internal cache.
///
/// # Arguments
///
/// * `url` - It takes the search url as an argument which will be used as the key to fetch the
/// cached results from the cache.
///
/// # Error
///
/// Returns a `SearchResults` struct containing the search results from the cache if nothing
/// goes wrong otherwise returns a `CacheError`.
pub async fn cached_results(&self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let mut mut_cache = self.cache.lock().await;
mut_cache.cached_results(url).await
}
/// A setter function which caches the results by using the `url` as the key and
/// `SearchResults` as the value.
///
/// # Arguments
///
/// * `search_results` - It takes the `SearchResults` as an argument which are results that
/// needs to be cached.
/// * `url` - It takes the search url as an argument which will be used as the key for storing
/// results in the cache.
///
/// # Error
///
/// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError`
/// on a failure.
pub async fn cache_results(
&self,
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
let mut mut_cache = self.cache.lock().await;
mut_cache.cache_results(search_results, urls).await
}
}
/// A function to initialise the cache backend.
pub async fn create_cache(config: &Config) -> impl Cacher {
#[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
return HybridCache::build(config).await;
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
return InMemoryCache::build(config).await;
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
return RedisCache::build(config).await;
#[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
return DisabledCache::build(config).await;
}
//#[cfg(feature = "Compress-cache-results")]

View file

@ -1,25 +0,0 @@
use chacha20poly1305::{
consts::{B0, B1},
ChaChaPoly1305,
};
use std::sync::OnceLock;
use chacha20::{
cipher::{
generic_array::GenericArray,
typenum::{UInt, UTerm},
StreamCipherCoreWrapper,
},
ChaChaCore,
};
/// The ChaCha20 core wrapped in a stream cipher for use in ChaCha20-Poly1305 authenticated encryption.
type StreamCipherCoreWrapperType =
StreamCipherCoreWrapper<ChaChaCore<UInt<UInt<UInt<UInt<UTerm, B1>, B0>, B1>, B0>>>;
/// Our ChaCha20-Poly1305 cipher instance, lazily initialized.
pub static CIPHER: OnceLock<ChaChaPoly1305<StreamCipherCoreWrapperType>> = OnceLock::new();
/// The type alias for our encryption key, a 32-byte array.
type GenericArrayType = GenericArray<u8, UInt<UInt<UInt<UInt<UTerm, B1>, B1>, B0>, B0>>;
/// Our encryption key, lazily initialized.
pub static ENCRYPTION_KEY: OnceLock<GenericArrayType> = OnceLock::new();

View file

@ -1,14 +1,11 @@
//! This module provides the functionality to parse the lua config and convert the config options
//! into rust readable form.
use crate::handler::{file_path, FileType};
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
use log::LevelFilter;
use mlua::Lua;
use std::{collections::HashMap, fs, thread::available_parallelism};
use crate::models::parser_models::Style;
use std::collections::HashMap;
/// A named struct which stores the parsed config file options.
#[derive(Debug, Clone)]
pub struct Config {
/// It stores the parsed port number option on which the server should launch.
pub port: u16,
@ -16,15 +13,8 @@ pub struct Config {
pub binding_ip: String,
/// It stores the theming options for the website.
pub style: Style,
#[cfg(feature = "redis-cache")]
/// It stores the redis connection url address on which the redis
/// client should connect.
pub redis_url: String,
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
/// It stores the max TTL for search results in cache.
pub cache_expiry_time: u16,
/// It stores the option to whether enable or disable production use.
pub aggregator: AggregatorConfig,
/// Memory cache invalidation time
pub cache_expiry_time: u64,
/// It stores the option to whether enable or disable logs.
pub logging: bool,
/// It stores the option to whether enable or disable debug mode.
@ -32,149 +22,42 @@ pub struct Config {
/// It toggles whether to use adaptive HTTP windows
pub adaptive_window: bool,
/// It stores all the engine names that were enabled by the user.
pub upstream_search_engines: HashMap<String, bool>,
pub upstream_search_engines: Vec<String>,
/// It stores the time (secs) which controls the server request timeout.
pub request_timeout: u8,
/// It stores the number of threads which controls the app will use to run.
pub threads: u8,
/// Set the keep-alive time for client connections to the HTTP server
pub client_connection_keep_alive: u8,
/// It stores configuration options for the ratelimiting middleware.
pub rate_limiter: RateLimiter,
/// It stores the level of safe search to be used for restricting content in the
/// search results.
pub safe_search: u8,
//pub client_connection_keep_alive: u8,
/// It stores the TCP connection keepalive duration in seconds.
pub tcp_connection_keep_alive: u8,
/// It stores the pool idle connection timeout in seconds.
pub pool_idle_connection_timeout: u8,
}
impl Default for Config {
fn default() -> Self {
Self {
port: 8080,
binding_ip: "127.0.0.1".into(),
style: Style {
theme: "simple".into(),
colorscheme: "catppuccin-mocha".into(),
animation: Some("simple-frosted-glow".into()),
},
cache_expiry_time: 600,
logging: true,
debug: false,
adaptive_window: false,
upstream_search_engines: vec!["bing".into(), "brave".into()],
request_timeout: 2,
tcp_connection_keep_alive: 10,
pool_idle_connection_timeout: 30,
}
}
}
impl Config {
/// A function which parses the config.lua file and puts all the parsed options in the newly
/// constructed Config struct and returns it.
///
/// # Arguments
///
/// * `logging_initialized` - It takes a boolean which ensures that the logging doesn't get
/// initialized twice. Pass false if the logger has not yet been initialized.
///
/// # Error
///
/// Returns a lua parse error if parsing of the config.lua file fails or has a syntax error
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
/// Config struct with all the parsed config options from the parsed config file.
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
let lua = Lua::new();
let globals = lua.globals();
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
.exec()?;
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
let debug: bool = globals.get::<_, bool>("debug")?;
let logging: bool = globals.get::<_, bool>("logging")?;
let adaptive_window: bool = globals.get::<_, bool>("adaptive_window")?;
if !logging_initialized {
set_logging_level(debug, logging);
}
let threads: u8 = if parsed_threads == 0 {
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
log::error!(
"Config Error: The value of `threads` option should be a non zero positive integer"
);
log::error!("Falling back to using {} threads", total_num_of_threads);
total_num_of_threads as u8
} else {
parsed_threads
};
let rate_limiter = globals.get::<_, HashMap<String, u8>>("rate_limiter")?;
let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
let safe_search: u8 = match parsed_safe_search {
0..=4 => parsed_safe_search,
_ => {
log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4.");
log::error!("Falling back to using the value `1` for the option");
1
}
};
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
let parsed_cet = globals.get::<_, u16>("cache_expiry_time")?;
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
let cache_expiry_time = match parsed_cet {
0..=59 => {
log::error!(
"Config Error: The value of `cache_expiry_time` must be greater than 60"
);
log::error!("Falling back to using the value `60` for the option");
60
}
_ => parsed_cet,
};
Ok(Config {
port: globals.get::<_, u16>("port")?,
binding_ip: globals.get::<_, String>("binding_ip")?,
style: Style::new(
globals.get::<_, String>("theme")?,
globals.get::<_, String>("colorscheme")?,
globals.get::<_, Option<String>>("animation")?,
),
#[cfg(feature = "redis-cache")]
redis_url: globals.get::<_, String>("redis_url")?,
aggregator: AggregatorConfig {
random_delay: globals.get::<_, bool>("production_use")?,
},
logging,
debug,
adaptive_window,
upstream_search_engines: globals
.get::<_, HashMap<String, bool>>("upstream_search_engines")?,
request_timeout: globals.get::<_, u8>("request_timeout")?,
tcp_connection_keep_alive: globals.get::<_, u8>("tcp_connection_keep_alive")?,
pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?,
threads,
client_connection_keep_alive: globals.get::<_, u8>("client_connection_keep_alive")?,
rate_limiter: RateLimiter {
number_of_requests: rate_limiter["number_of_requests"],
time_limit: rate_limiter["time_limit"],
},
safe_search,
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
cache_expiry_time,
})
/// Creates a new config based on the environment variables.
pub fn parse() -> Self {
Self::default()
}
}
/// a helper function that sets the proper logging level
///
/// # Arguments
///
/// * `debug` - It takes the option to whether enable or disable debug mode.
/// * `logging` - It takes the option to whether enable or disable logs.
fn set_logging_level(debug: bool, logging: bool) {
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
if pkg_env_var.to_lowercase() == "dev" {
env_logger::Builder::new()
.filter(None, LevelFilter::Trace)
.init();
return;
}
}
// Initializing logging middleware with level set to default or info.
let log_level = match (debug, logging) {
(true, true) => LevelFilter::Debug,
(true, false) => LevelFilter::Debug,
(false, true) => LevelFilter::Info,
(false, false) => LevelFilter::Error,
};
env_logger::Builder::new().filter(None, log_level).init();
}

View file

@ -47,7 +47,6 @@ impl SearchEngine for Bing {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Bing uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1

View file

@ -43,16 +43,9 @@ impl SearchEngine for Brave {
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
let safe_search_level = match safe_search {
0 => "off",
1 => "moderate",
_ => "strict",
};
let header_map = HeaderMap::try_from(&HashMap::from([
("User-Agent".to_string(), user_agent.to_string()),
(
@ -60,10 +53,7 @@ impl SearchEngine for Brave {
"application/x-www-form-urlencoded".to_string(),
),
("Referer".to_string(), "https://google.com/".to_string()),
(
"Cookie".to_string(),
format!("safe_search={safe_search_level}"),
),
("Cookie".to_string(), "safe_search=off".into()),
]))
.change_context(EngineError::UnexpectedError)?;

View file

@ -46,7 +46,6 @@ impl SearchEngine for DuckDuckGo {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.

View file

@ -61,7 +61,6 @@ impl SearchEngine for LibreX {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.

View file

@ -46,7 +46,6 @@ impl SearchEngine for Mojeek {
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Mojeek uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1
@ -75,21 +74,6 @@ impl SearchEngine for Mojeek {
let qss = search_engines.join("%2C");
// A branchless condition to check whether the `safe_search` parameter has the
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
// the value to 1 for all other values of `safe_search`
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
let safe = u8::from(safe_search != 0).to_string();
// Mojeek detects automated requests, these are preferences that are
// able to circumvent the countermeasure. Some of these are
// not documented in their Search API
@ -104,7 +88,6 @@ impl SearchEngine for Mojeek {
("hp", "minimal"),
("lb", "en"),
("qss", &qss),
("safe", &safe),
];
let mut query_params_string = String::new();

View file

@ -42,27 +42,8 @@ impl SearchEngine for Searx {
page: u32,
user_agent: &str,
client: &Client,
mut safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// A branchless condition to check whether the `safe_search` parameter has the
// value greater than equal to three or not. If it is, then it modifies the
// `safesearch` parameters value to 2.
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// safe_search = u8::from(safe_search == 3) * 2;
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
safe_search = u8::from(safe_search >= 3) * 2;
let url: String = format!(
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",
page + 1
);
let url: String = format!("https://searx.be/search?q={query}&pageno={}", page + 1);
// initializing headers and adding appropriate headers.
let header_map = HeaderMap::try_from(&HashMap::from([

View file

@ -46,7 +46,6 @@ impl SearchEngine for Startpage {
page: u32,
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.

View file

@ -1,124 +0,0 @@
//! This main library module provides the functionality to provide and handle the Tcp server
//! and register all the routes for the `websurfx` meta search engine website.
#![forbid(unsafe_code, clippy::panic)]
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
pub mod cache;
pub mod config;
pub mod engines;
pub mod handler;
pub mod models;
pub mod results;
pub mod server;
pub mod templates;
use std::{net::TcpListener, sync::OnceLock, time::Duration};
use crate::server::router;
use actix_cors::Cors;
use actix_files as fs;
use actix_governor::{Governor, GovernorConfigBuilder};
use actix_web::{
dev::Server,
http::header,
middleware::{Compress, Logger},
web, App, HttpServer,
};
use cache::cacher::{Cacher, SharedCache};
use config::parser::Config;
use handler::{file_path, FileType};
/// A static constant for holding the cache struct.
static SHARED_CACHE: OnceLock<SharedCache> = OnceLock::new();
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
///
/// # Arguments
///
/// * `listener` - A `TcpListener` instance representing the address and port to listen on.
///
/// # Returns
///
/// Returns a `Result` containing a `Server` instance on success, or an `std::io::Error` on failure.
///
/// # Example
///
/// ```rust
/// use std::{net::TcpListener, sync::OnceLock};
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
///
/// /// A static constant for holding the parsed config.
/// static CONFIG: OnceLock<Config> = OnceLock::new();
///
/// #[tokio::main]
/// async fn main(){
/// // Initialize the parsed config globally.
/// let config = CONFIG.get_or_init(|| Config::parse(true).unwrap());
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
/// let cache = create_cache(config).await;
/// let server = run(listener,&config,cache).expect("Failed to start server");
/// }
/// ```
pub fn run(
listener: TcpListener,
config: &'static Config,
cache: impl Cacher + 'static,
) -> std::io::Result<Server> {
let public_folder_path: &str = file_path(FileType::Theme)?;
let cache = SHARED_CACHE.get_or_init(|| SharedCache::new(cache));
let server = HttpServer::new(move || {
let cors: Cors = Cors::default()
.allow_any_origin()
.allowed_methods(vec!["GET"])
.allowed_headers(vec![
header::ORIGIN,
header::CONTENT_TYPE,
header::REFERER,
header::COOKIE,
]);
App::new()
// Compress the responses provided by the server for the client requests.
.wrap(Compress::default())
.wrap(Logger::default()) // added logging middleware for logging.
.app_data(web::Data::new(config))
.app_data(web::Data::new(cache))
.wrap(cors)
.wrap(Governor::new(
&GovernorConfigBuilder::default()
.per_second(config.rate_limiter.time_limit as u64)
.burst_size(config.rate_limiter.number_of_requests as u32)
.finish()
.unwrap(),
))
// Serve images and static files (css and js files).
.service(
fs::Files::new("/static", format!("{}/static", public_folder_path))
.show_files_listing(),
)
.service(
fs::Files::new("/images", format!("{}/images", public_folder_path))
.show_files_listing(),
)
.service(router::robots_data) // robots.txt
.service(router::index) // index page
.service(server::routes::search::search) // search page
.service(router::about) // about page
.service(router::settings) // settings page
.default_service(web::route().to(router::not_found)) // error page
})
.workers(config.threads as usize)
// Set the keep-alive timer for client connections
.keep_alive(Duration::from_secs(
config.client_connection_keep_alive as u64,
))
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
.listen(listener)?
.run();
Ok(server)
}

View file

@ -35,7 +35,7 @@ impl SearchResult {
///
/// * `title` - The title of the search result.
/// * `url` - The url which is accessed when clicked on it
/// (href url in html in simple words).
/// (href url in html in simple words).
/// * `description` - The description of the search result.
/// * `engine` - The names of the upstream engines from which this results were provided.
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
@ -125,7 +125,7 @@ impl EngineErrorInfo {
/// # Arguments
///
/// * `error` - It takes the error type which occured while fetching the result from a particular
/// search engine.
/// search engine.
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
pub fn new(error: &EngineError, engine: &str) -> Self {
Self {
@ -178,11 +178,11 @@ impl SearchResults {
/// # Arguments
///
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
/// and stores it into a vector of `SearchResult` structs.
/// and stores it into a vector of `SearchResult` structs.
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
/// the search url.
/// the search url.
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
/// which engines failed with their names, reason and their severity color name.
/// which engines failed with their names, reason and their severity color name.
pub fn new(results: Vec<SearchResult>, engine_errors_info: &[EngineErrorInfo]) -> Self {
Self {
results,

View file

@ -146,7 +146,6 @@ pub trait SearchEngine: Sync + Send {
page: u32,
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<Vec<(String, SearchResult)>, EngineError>;
}

View file

@ -10,7 +10,7 @@
/// order to allow the deserializing the json back to struct in aggregate function in
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
/// it to the template files.
#[derive(Default)]
#[derive(Default, Debug, Clone)]
pub struct Style {
/// It stores the parsed theme option used to set a theme for the website.
pub theme: String,
@ -29,7 +29,7 @@ impl Style {
///
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
/// for the theme being used.
/// for the theme being used.
pub fn new(theme: String, colorscheme: String, animation: Option<String>) -> Self {
Style {
theme,

View file

@ -30,19 +30,16 @@ pub struct Cookie<'a> {
pub colorscheme: Cow<'a, str>,
/// It stores the user selected upstream search engines selected from the UI.
pub engines: Cow<'a, Vec<Cow<'a, str>>>,
/// It stores the user selected safe search level from the UI.
pub safe_search_level: u8,
}
impl<'a> Cookie<'a> {
/// server_models::Cookie contructor function
pub fn build(style: &'a Style, mut engines: Vec<Cow<'a, str>>, safe_search_level: u8) -> Self {
pub fn build(style: &'a Style, mut engines: Vec<Cow<'a, str>>) -> Self {
engines.sort();
Self {
theme: Cow::Borrowed(&style.theme),
colorscheme: Cow::Borrowed(&style.colorscheme),
engines: Cow::Owned(engines),
safe_search_level,
}
}
}

View file

@ -3,7 +3,6 @@
use super::user_agent::random_user_agent;
use crate::config::parser::Config;
use crate::handler::{file_path, FileType};
use crate::models::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
engine_models::{EngineError, EngineHandler},
@ -14,7 +13,6 @@ use futures::stream::FuturesUnordered;
use regex::Regex;
use reqwest::{Client, ClientBuilder};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::{
fs::File,
io::{AsyncBufReadExt, BufReader},
@ -61,7 +59,7 @@ type FutureVec =
/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
/// * `request_timeout` - Accepts a time (secs) as a value which controls the server request timeout.
/// user through the UI or the config file.
/// user through the UI or the config file.
///
/// # Error
///
@ -71,9 +69,8 @@ type FutureVec =
pub async fn aggregate(
query: &str,
page: u32,
config: &Config,
config: actix_web::web::Data<crate::config::parser::Config>,
upstream_search_engines: &[EngineHandler],
safe_search: u8,
) -> Result<SearchResults, Box<dyn std::error::Error>> {
let client = CLIENT.get_or_init(|| {
ClientBuilder::new()
@ -93,13 +90,6 @@ pub async fn aggregate(
let user_agent: &str = random_user_agent();
// Add a random delay before making the request.
if config.aggregator.random_delay || !config.debug {
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
tokio::time::sleep(Duration::from_secs(delay)).await;
}
let mut names: Vec<&str> = Vec::with_capacity(0);
// create tasks for upstream result fetching
@ -112,13 +102,7 @@ pub async fn aggregate(
let query_partially_cloned = query.clone();
tasks.push(tokio::spawn(async move {
search_engine
.results(
&query_partially_cloned,
page,
user_agent,
client,
safe_search,
)
.results(&query_partially_cloned, page, user_agent, client)
.await
}));
}
@ -169,25 +153,6 @@ pub async fn aggregate(
};
}
if safe_search >= 3 {
let mut blacklist_map: Vec<(String, SearchResult)> = Vec::new();
filter_with_lists(
&mut result_map,
&mut blacklist_map,
file_path(FileType::BlockList)?,
)
.await?;
filter_with_lists(
&mut blacklist_map,
&mut result_map,
file_path(FileType::AllowList)?,
)
.await?;
drop(blacklist_map);
}
let mut results: Vec<SearchResult> = result_map
.iter()
.map(|(_, value)| {

View file

@ -11,9 +11,7 @@ use tokio::fs::read_to_string;
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
#[get("/")]
pub async fn index(
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
pub async fn index(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::index::index(
&config.style.colorscheme,
@ -27,7 +25,7 @@ pub async fn index(
/// Handles the route of any other accessed route/page which is not provided by the
/// website essentially the 404 error page.
pub async fn not_found(
config: web::Data<&'static Config>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::not_found::not_found(
@ -51,9 +49,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
/// Handles the route of about page of the `websurfx` meta search engine website.
#[get("/about")]
pub async fn about(
config: web::Data<&'static Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
pub async fn about(config: web::Data<Config>) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::about::about(
&config.style.colorscheme,
@ -67,15 +63,14 @@ pub async fn about(
/// Handles the route of settings page of the `websurfx` meta search engine website.
#[get("/settings")]
pub async fn settings(
config: web::Data<&'static Config>,
config: web::Data<Config>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::settings::settings(
config.safe_search,
&config.style.colorscheme,
&config.style.theme,
&config.style.animation,
&config.upstream_search_engines,
//&config.upstream_search_engines,
)?
.0,
))

View file

@ -1,9 +1,8 @@
//! This module handles the search route of the search engine website.
use crate::{
cache::cacher::SharedCache,
cache::cacher::Cache,
config::parser::Config,
handler::{file_path, FileType},
models::{
aggregation_models::SearchResults,
engine_models::EngineHandler,
@ -12,13 +11,8 @@ use crate::{
results::aggregator::aggregate,
};
use actix_web::{get, http::header::ContentType, web, HttpRequest, HttpResponse};
use regex::Regex;
use std::borrow::Cow;
use tokio::{
fs::File,
io::{AsyncBufReadExt, BufReader},
join,
};
use tokio::join;
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
/// two search url parameters `q` and `page` where `page` parameter is optional.
@ -37,8 +31,8 @@ use tokio::{
#[get("/search")]
pub async fn search(
req: HttpRequest,
config: web::Data<&'static Config>,
cache: web::Data<&'static SharedCache>,
config: web::Data<Config>,
cache: web::Data<Cache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
@ -53,7 +47,7 @@ pub async fn search(
let cookie = req.cookie("appCookie");
// Get search settings using the user's cookie or from the server's config
let mut search_settings: server_models::Cookie<'_> = cookie
let search_settings: server_models::Cookie<'_> = cookie
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
.unwrap_or_else(|| {
server_models::Cookie::build(
@ -61,22 +55,14 @@ pub async fn search(
config
.upstream_search_engines
.iter()
.filter_map(|(engine, enabled)| {
enabled.then_some(Cow::Borrowed(engine.as_str()))
})
.map(|e| Cow::Borrowed(e.as_str()))
.collect(),
config.safe_search,
)
});
search_settings.safe_search_level = get_safesearch_level(
params.safesearch,
search_settings.safe_search_level,
config.safe_search,
);
// Closure wrapping the results function capturing local references
let get_results = |page| results(&config, &cache, query, page, &search_settings);
let get_results =
|page| results(config.clone(), cache.clone(), query, page, &search_settings);
// .max(1) makes sure that the page >= 0.
let page = params.page.unwrap_or(1).max(1) - 1;
@ -108,7 +94,7 @@ pub async fn search(
results = Arc::new(current_results?);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys) });
} else {
let (current_results, next_results) =
join!(get_results(page), get_results(page + 1));
@ -122,7 +108,7 @@ pub async fn search(
[results.0.clone(), parsed_next_results.0],
);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys).await });
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys) });
}
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
@ -148,7 +134,7 @@ pub async fn search(
/// # Arguments
///
/// * `url` - It takes the url of the current page that requested the search results for a
/// particular search query.
/// particular search query.
/// * `config` - It takes a parsed config struct.
/// * `query` - It takes the page number as u32 value.
/// * `req` - It takes the `HttpRequest` struct as a value.
@ -158,48 +144,30 @@ pub async fn search(
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
async fn results(
config: &'static Config,
cache: &'static SharedCache,
config: web::Data<crate::config::parser::Config>,
cache: web::Data<crate::cache::cacher::Cache>,
query: &str,
page: u32,
search_settings: &server_models::Cookie<'_>,
) -> Result<(SearchResults, String), Box<dyn std::error::Error>> {
// eagerly parse cookie value to evaluate safe search level
let safe_search_level = search_settings.safe_search_level;
let cache_key = format!(
"http://{}:{}/search?q={}&page={}&safesearch={}&engines={}",
"http://{}:{}/search?q={}&page={}&engines={}",
config.binding_ip,
config.port,
query,
page,
safe_search_level,
search_settings.engines.join(",")
);
// fetch the cached results json.
let cached_results = cache.cached_results(&cache_key).await;
let cached_results = cache.cached_results(&cache_key);
// check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly.
match cached_results {
Ok(results) => Ok((results, cache_key)),
Err(_) => {
if safe_search_level == 4 {
let mut results: SearchResults = SearchResults::default();
let flag: bool =
!is_match_from_filter_list(file_path(FileType::BlockList)?, query).await?;
// Return early when query contains disallowed words,
if flag {
results.set_disallowed();
cache
.cache_results(&[results.clone()], &[cache_key.clone()])
.await?;
results.set_safe_search_level(safe_search_level);
return Ok((results, cache_key));
}
}
Some(results) => Ok((results, cache_key)),
None => {
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
@ -215,7 +183,6 @@ async fn results(
.iter()
.filter_map(|engine| EngineHandler::new(engine).ok())
.collect::<Vec<EngineHandler>>(),
safe_search_level,
)
.await?
}
@ -231,69 +198,12 @@ async fn results(
results.no_engines_selected(),
);
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
cache
.cache_results(&[results.clone()], &[cache_key.clone()])
.await?;
results.set_safe_search_level(safe_search_level);
cache.cache_results(&[results.clone()], &[cache_key.clone()]);
Ok((results, cache_key))
}
}
}
/// A helper function which checks whether the search query contains any keywords which should be
/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
///
/// # Arguments
///
/// * `file_path` - It takes the file path of the list as the argument.
/// * `query` - It takes the search query to be checked against the list as an argument.
///
/// # Error
///
/// Returns a bool indicating whether the results were found in the list or not on success
/// otherwise returns a standard error type on a failure.
async fn is_match_from_filter_list(
file_path: &str,
query: &str,
) -> Result<bool, Box<dyn std::error::Error>> {
let reader = BufReader::new(File::open(file_path).await?);
let mut lines = reader.lines();
while let Some(line) = lines.next_line().await? {
let re = Regex::new(&line)?;
if re.is_match(query) {
return Ok(true);
}
}
Ok(false)
}
/// A helper function to choose the safe search level value based on the URL parameters,
/// cookie value and config value.
///
/// # Argurments
///
/// * `safe_search_level_from_url` - Safe search level from the URL parameters.
/// * `cookie_safe_search_level` - Safe search level value from the cookie.
/// * `config_safe_search_level` - Safe search level value from the config file.
///
/// # Returns
///
/// Returns an appropriate safe search level value based on the safe search level values
/// from the URL parameters, cookie and the config file.
fn get_safesearch_level(
safe_search_level_from_url: Option<u8>,
cookie_safe_search_level: u8,
config_safe_search_level: u8,
) -> u8 {
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)))
}
#[cfg(test)]
mod tests {
use std::time::{SystemTime, UNIX_EPOCH};

View file

@ -12,7 +12,7 @@ const SAFE_SEARCH_LEVELS_NAME: [&str; 3] = ["None", "Low", "Moderate"];
/// # Arguments
///
/// * `engine_errors_info` - It takes the engine errors list containing errors for each upstream
/// search engine which failed to provide results as an argument.
/// search engine which failed to provide results as an argument.
/// * `safe_search_level` - It takes the safe search level with values from 0-2 as an argument.
/// * `query` - It takes the current search query provided by user as an argument.
///

View file

@ -9,7 +9,7 @@ use maud::{html, Markup};
/// # Arguments
///
/// * `engine_names` - It takes the key value pair list of all available engine names and there corresponding
/// selected (enabled/disabled) value as an argument.
/// selected (enabled/disabled) value as an argument.
///
/// # Returns
///

View file

@ -1,42 +0,0 @@
//! A module that handles the general tab for setting page view in the `websurfx` frontend.
use maud::{html, Markup};
/// A constant holding the named safe search level options for the corresponding values 0, 1 and 2.
const SAFE_SEARCH_LEVELS: [(u8, &str); 3] = [(0, "None"), (1, "Low"), (2, "Moderate")];
/// A functions that handles the html code for the general tab for the settings page for the search page.
///
/// # Arguments
///
/// * `safe_search_level` - It takes the safe search level as an argument.
///
/// # Returns
///
/// It returns the compiled html markup code for the general tab.
pub fn general(safe_search_level: u8) -> Markup {
html!(
div class="general tab active"{
h1{"General"}
h3{"Select a safe search level"}
p class="description"{
"Select a safe search level from the menu below to filter content based on the level."
}
@if safe_search_level < 3 {
select name="safe_search_levels" {
// Sets the user selected safe_search_level name from the config file as the first option in the selection list.
option value=(safe_search_level){(SAFE_SEARCH_LEVELS.iter().find(|level| level.0 == safe_search_level).unwrap().1)}
@for (k,v) in SAFE_SEARCH_LEVELS.iter().filter(|level| level.0 != safe_search_level){
option value=(k){(v)}
}
}
}
@else {
p class="admin_warning" {"⚠️ This setting is being managed by the server administrator."}
select name="safe_search_levels" disabled {
option value=(SAFE_SEARCH_LEVELS[2].0){(SAFE_SEARCH_LEVELS[2].1)}
}
}
}
)
}

View file

@ -3,5 +3,4 @@
pub mod cookies;
pub mod engines;
pub mod general;
pub mod user_interface;

View file

@ -11,9 +11,9 @@ use std::fs::read_dir;
/// # Arguments
///
/// * `style_type` - It takes the style type of the values `theme` and `colorscheme` as an
/// argument.
/// argument.
/// * `selected_style` - It takes the currently selected style value provided via the config file
/// as an argument.
/// as an argument.
///
/// # Error
///

View file

@ -7,9 +7,7 @@ use maud::{html, Markup};
use crate::templates::partials::{
footer::footer,
header::header,
settings_tabs::{
cookies::cookies, engines::engines, general::general, user_interface::user_interface,
},
settings_tabs::{cookies::cookies, engines::engines, user_interface::user_interface},
};
/// A function that handles the html code for the settings page view in the search engine frontend.
@ -27,11 +25,10 @@ use crate::templates::partials::{
/// This function returns a compiled html markup code on success otherwise returns a standard error
/// message.
pub fn settings(
safe_search_level: u8,
colorscheme: &str,
theme: &str,
animation: &Option<String>,
engine_names: &HashMap<String, bool>,
//engine_names: &HashMap<String, bool>,
) -> Result<Markup, Box<dyn std::error::Error>> {
Ok(html!(
(header(colorscheme, theme, animation))
@ -46,9 +43,8 @@ pub fn settings(
.btn onclick="setActiveTab(this)"{"cookies"}
}
.main_container{
(general(safe_search_level))
(user_interface(theme, colorscheme, animation)?)
(engines(engine_names))
//(engines(engine_names))
(cookies())
p class="message"{}
button type="submit" onclick="setClientSettings()"{"Save"}

View file

@ -1,40 +0,0 @@
use std::{net::TcpListener, sync::OnceLock};
use websurfx::{config::parser::Config, run, templates::views};
/// A static constant for holding the parsed config.
static CONFIG: OnceLock<Config> = OnceLock::new();
// Starts a new instance of the HTTP server, bound to a random available port
async fn spawn_app() -> String {
// Binding to port 0 will trigger the OS to assign a port for us.
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
let port = listener.local_addr().unwrap().port();
let config = CONFIG.get_or_init(|| Config::parse(false).unwrap());
let cache = websurfx::cache::cacher::create_cache(config).await;
let server = run(listener, config, cache).expect("Failed to bind address");
tokio::spawn(server);
format!("http://127.0.0.1:{}/", port)
}
#[tokio::test]
async fn test_index() {
let address = spawn_app().await;
let client = reqwest::Client::new();
let res = client.get(address).send().await.unwrap();
assert_eq!(res.status(), 200);
let config = Config::parse(true).unwrap();
let template = views::index::index(
&config.style.colorscheme,
&config.style.theme,
&config.style.animation,
)
.0;
assert_eq!(res.text().await.unwrap(), template);
}
// TODO: Write tests for testing parameters for search function that if provided with something
// other than u32 like alphabets and special characters than it should panic

View file

View file

View file

@ -1,75 +0,0 @@
-- ### General ###
logging = true -- an option to enable or disable logs.
debug = false -- an option to enable or disable debug mode.
threads = 10 -- the amount of threads that the app will use to run (the value should be greater than 0).
-- ### Server ###
port = "8080" -- port on which server should be launched
binding_ip = "127.0.0.1" --ip address on the which server should be launched.
production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users (more than one))
-- if production_use is set to true
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
tcp_connection_keep_alive = 30 -- the amount of time the tcp connection should remain alive to the upstream search engines (or connected to the server). (value in seconds).
pool_idle_connection_timeout = 30 -- timeout for the idle connections in the reqwest HTTP connection pool (value in seconds).
rate_limiter = {
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
}
-- Set whether the server will use an adaptive/dynamic HTTPS window size, see https://httpwg.org/specs/rfc9113.html#fc-principles
https_adaptive_window_size = false
-- Set keep-alive timer in seconds; keeps clients connected to the HTTP server, different from the connection to upstream search engines
client_connection_keep_alive = 120
-- ### Search ###
-- Filter results based on different levels. The levels provided are:
-- {{
-- 0 - None
-- 1 - Low
-- 2 - Moderate
-- 3 - High
-- 4 - Aggressive
-- }}
safe_search = 2
-- ### Website ###
-- The different colorschemes provided are:
-- {{
-- catppuccin-mocha
-- dark-chocolate
-- dracula
-- gruvbox-dark
-- monokai
-- nord
-- oceanic-next
-- one-dark
-- solarized-dark
-- solarized-light
-- tokyo-night
-- tomorrow-night
-- }}
colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used for the website theme
-- The different themes provided are:
-- {{
-- simple
-- }}
theme = "simple" -- the theme name which should be used for the website
-- The different animations provided are:
-- {{
-- simple-frosted-glow
-- }}
animation = "simple-frosted-glow" -- the animation name which should be used with the theme or `nil` if you don't want any animations.
-- ### Caching ###
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
cache_expiry_time = 600 -- This option takes the expiry time of the search results (value in seconds and the value should be greater than or equal to 60 seconds).
-- ### Search Engines ###
upstream_search_engines = {
DuckDuckGo = true,
Searx = false,
Brave = false,
Startpage = false,
LibreX = false,
Mojeek = false,
Bing = false,
} -- select the upstream search engines from which the results should be fetched.