Merge branch 'rolling' into feat-error-box-for-engine-errors

This commit is contained in:
zhou fan 2023-08-24 08:16:32 +08:00 committed by GitHub
commit 2f1fa00f87
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 204 additions and 127 deletions

View file

@ -1,14 +1,12 @@
//! This module provides the functionality to parse the lua config and convert the config options
//! into rust readable form.
use crate::handler::paths::{file_path, FileType};
use super::parser_models::Style;
use log::LevelFilter;
use rlua::Lua;
use std::{collections::HashMap, format, fs, path::Path, thread::available_parallelism};
// ------- Constants --------
static COMMON_DIRECTORY_NAME: &str = "websurfx";
static CONFIG_FILE_NAME: &str = "config.lua";
use std::{collections::HashMap, fs, thread::available_parallelism};
/// A named struct which stores the parsed config file options.
///
@ -69,7 +67,7 @@ impl Config {
let globals = context.globals();
context
.load(&fs::read_to_string(Config::config_path()?)?)
.load(&fs::read_to_string(file_path(FileType::Config)?)?)
.exec()?;
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
@ -114,52 +112,6 @@ impl Config {
})
})
}
/// A helper function which returns an appropriate config file path checking if the config
/// file exists on that path.
///
/// # Error
///
/// Returns a `config file not found!!` error if the config file is not present under following
/// paths which are:
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
/// one (3).
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
/// here then it returns an error as mentioned above.
fn config_path() -> Result<String, Box<dyn std::error::Error>> {
// check user config
let path = format!(
"{}/.config/{}/config.lua",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME
);
if Path::new(path.as_str()).exists() {
return Ok(format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
CONFIG_FILE_NAME
));
}
// look for config in /etc/xdg
if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
.exists()
{
return Ok("/etc/xdg/websurfx/config.lua".to_string());
}
// use dev config
if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists()
{
return Ok("./websurfx/config.lua".to_string());
}
// if no of the configs above exist, return error
Err("Config file not found!!".to_string().into())
}
}
/// a helper function that sets the proper logging level

View file

@ -1 +1 @@
pub mod public_paths;
pub mod paths;

111
src/handler/paths.rs Normal file
View file

@ -0,0 +1,111 @@
//! This module provides the functionality to handle theme folder present on different paths and
//! provide one appropriate path on which it is present and can be used.
use std::collections::HashMap;
use std::io::Error;
use std::path::Path;
// ------- Constants --------
static PUBLIC_DIRECTORY_NAME: &str = "public";
static COMMON_DIRECTORY_NAME: &str = "websurfx";
static CONFIG_FILE_NAME: &str = "config.lua";
static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
#[derive(Hash, PartialEq, Eq, Debug)]
pub enum FileType {
Config,
AllowList,
BlockList,
Theme,
}
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
once_cell::sync::Lazy::new(|| {
HashMap::from([
(
FileType::Config,
vec![
format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
CONFIG_FILE_NAME
),
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
],
),
(
FileType::Theme,
vec![
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
format!("./{}/", PUBLIC_DIRECTORY_NAME),
],
),
(
FileType::AllowList,
vec![
format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
ALLOWLIST_FILE_NAME
),
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
],
),
(
FileType::BlockList,
vec![
format!(
"{}/.config/{}/{}",
std::env::var("HOME").unwrap(),
COMMON_DIRECTORY_NAME,
BLOCKLIST_FILE_NAME
),
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
],
),
])
});
/// A helper function which returns an appropriate config file path checking if the config
/// file exists on that path.
///
/// # Error
///
/// Returns a `config file not found!!` error if the config file is not present under following
/// paths which are:
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
/// one (3).
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
/// here then it returns an error as mentioned above.
/// A function which returns an appropriate theme directory path checking if the theme
/// directory exists on that path.
///
/// # Error
///
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
/// paths which are:
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
/// 2. Under project folder ( or codebase in other words) if it is not present
/// here then it returns an error as mentioned above.
pub fn file_path(file_type: FileType) -> Result<String, Error> {
let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
for (idx, _) in file_path.iter().enumerate() {
if Path::new(file_path[idx].as_str()).exists() {
return Ok(file_path[idx].clone());
}
}
// if no of the configs above exist, return error
Err(Error::new(
std::io::ErrorKind::NotFound,
format!("{:?} file not found!!", file_type),
))
}

View file

@ -1,33 +0,0 @@
//! This module provides the functionality to handle theme folder present on different paths and
//! provide one appropriate path on which it is present and can be used.
use std::io::Error;
use std::path::Path;
// ------- Constants --------
static PUBLIC_DIRECTORY_NAME: &str = "public";
/// A function which returns an appropriate theme directory path checking if the theme
/// directory exists on that path.
///
/// # Error
///
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
/// paths which are:
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
/// 2. Under project folder ( or codebase in other words) if it is not present
/// here then it returns an error as mentioned above.
pub fn public_path() -> Result<String, Error> {
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
}
if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
}
Err(Error::new(
std::io::ErrorKind::NotFound,
"Themes (public) folder not found!!",
))
}

View file

@ -17,7 +17,7 @@ use actix_files as fs;
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
use config::parser::Config;
use handlebars::Handlebars;
use handler::public_paths::public_path;
use handler::paths::{file_path, FileType};
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
///
@ -42,7 +42,7 @@ use handler::public_paths::public_path;
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
let mut handlebars: Handlebars = Handlebars::new();
let public_folder_path: String = public_path()?;
let public_folder_path: String = file_path(FileType::Theme)?;
handlebars
.register_templates_directory(".html", format!("{}/templates", public_folder_path))

View file

@ -1,18 +1,26 @@
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.
use std::{collections::HashMap, time::Duration};
use error_stack::Report;
use rand::Rng;
use tokio::task::JoinHandle;
use std::{
collections::HashMap,
io::{BufReader, Read},
time::Duration,
};
use super::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
user_agent::random_user_agent,
};
use error_stack::Report;
use rand::Rng;
use regex::Regex;
use std::{fs::File, io::BufRead};
use tokio::task::JoinHandle;
use crate::engines::engine_models::{EngineError, EngineHandler};
use crate::{
engines::engine_models::{EngineError, EngineHandler},
handler::paths::{file_path, FileType},
};
/// Aliases for long type annotations
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
@ -106,7 +114,7 @@ pub async fn aggregate(
log::error!("Engine Error: {:?}", error);
engine_errors_info.push(EngineErrorInfo::new(
error.downcast_ref::<EngineError>().unwrap(),
engine_name.to_string(),
engine_name,
));
};
@ -143,7 +151,22 @@ pub async fn aggregate(
}
}
let results = result_map.into_values().collect();
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
filter_with_lists(
&mut result_map,
&mut blacklist_map,
&file_path(FileType::BlockList)?,
)?;
filter_with_lists(
&mut blacklist_map,
&mut result_map,
&file_path(FileType::AllowList)?,
)?;
drop(blacklist_map);
let results: Vec<SearchResult> = result_map.into_values().collect();
Ok(SearchResults::new(
results,
@ -151,3 +174,23 @@ pub async fn aggregate(
engine_errors_info,
))
}
fn filter_with_lists(
map_to_be_filtered: &mut HashMap<String, SearchResult>,
resultant_map: &mut HashMap<String, SearchResult>,
file_path: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = BufReader::new(File::open(file_path)?);
for line in reader.by_ref().lines() {
let re = Regex::new(&line?)?;
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
if re.is_match(&url.to_lowercase())
|| re.is_match(&search_result.title.to_lowercase())
|| re.is_match(&search_result.description.to_lowercase())
{
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
}
}
}
Ok(())
}

View file

@ -8,7 +8,7 @@ use crate::{
cache::cacher::RedisCache,
config::parser::Config,
engines::engine_models::EngineHandler,
handler::public_paths::public_path,
handler::paths::{file_path, FileType},
results::{aggregation_models::SearchResults, aggregator::aggregate},
};
use actix_web::{get, web, HttpRequest, HttpResponse};
@ -215,7 +215,8 @@ async fn results(
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
let page_content: String = read_to_string(format!("{}/robots.txt", public_path()?))?;
let page_content: String =
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
Ok(HttpResponse::Ok()
.content_type("text/plain; charset=ascii")
.body(page_content))