Merge branch 'rolling' into change-document-style-with-linter-warnings

This commit is contained in:
neon_arch 2023-09-12 17:49:46 +03:00 committed by GitHub
commit fb231de416
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 1116 additions and 486 deletions

View file

@ -2,7 +2,10 @@
//! meta search engine website and provide appropriate response to each route/page
//! when requested.
use std::fs::read_to_string;
use std::{
fs::{read_to_string, File},
io::{BufRead, BufReader, Read},
};
use crate::{
cache::cacher::RedisCache,
@ -13,9 +16,14 @@ use crate::{
};
use actix_web::{get, web, HttpRequest, HttpResponse};
use handlebars::Handlebars;
use regex::Regex;
use serde::Deserialize;
use tokio::join;
// ---- Constants ----
/// Initialize redis cache connection once and store it on the heap.
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
/// A named struct which deserializes all the user provided search parameters and stores them.
#[derive(Deserialize)]
struct SearchParams {
@ -25,6 +33,7 @@ struct SearchParams {
/// It stores the search parameter `page` (or pageno in simple words)
/// of the search url.
page: Option<u32>,
safesearch: Option<u8>,
}
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
@ -53,13 +62,13 @@ pub async fn not_found(
/// A named struct which is used to deserialize the cookies fetched from the client side.
#[allow(dead_code)]
#[derive(Deserialize)]
struct Cookie {
struct Cookie<'a> {
/// It stores the theme name used in the website.
theme: String,
theme: &'a str,
/// It stores the colorscheme name used for the website theme.
colorscheme: String,
colorscheme: &'a str,
/// It stores the user selected upstream search engines selected from the UI.
engines: Vec<String>,
engines: Vec<&'a str>,
}
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
@ -95,42 +104,58 @@ pub async fn search(
None => 1,
};
let safe_search: u8 = match config.safe_search {
3..=4 => config.safe_search,
_ => match &params.safesearch {
Some(safesearch) => match safesearch {
0..=2 => *safesearch,
_ => 1,
},
None => config.safe_search,
},
};
let (_, results, _) = join!(
results(
format!(
"http://{}:{}/search?q={}&page={}",
"http://{}:{}/search?q={}&page={}&safesearch={}",
config.binding_ip,
config.port,
query,
page - 1
page - 1,
safe_search
),
&config,
query.to_string(),
query,
page - 1,
req.clone(),
safe_search
),
results(
format!(
"http://{}:{}/search?q={}&page={}",
config.binding_ip, config.port, query, page
"http://{}:{}/search?q={}&page={}&safesearch={}",
config.binding_ip, config.port, query, page, safe_search
),
&config,
query.to_string(),
query,
page,
req.clone(),
safe_search
),
results(
format!(
"http://{}:{}/search?q={}&page={}",
"http://{}:{}/search?q={}&page={}&safesearch={}",
config.binding_ip,
config.port,
query,
page + 1
page + 1,
safe_search
),
&config,
query.to_string(),
query,
page + 1,
req.clone(),
safe_search
)
);
@ -161,30 +186,53 @@ pub async fn search(
async fn results(
url: String,
config: &Config,
query: String,
query: &str,
page: u32,
req: HttpRequest,
safe_search: u8,
) -> Result<SearchResults, Box<dyn std::error::Error>> {
// Initialize redis cache connection struct
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
let mut redis_cache: RedisCache = REDIS_CACHE
.get_or_init(async {
// Initialize redis cache connection pool only one and store it in the heap.
RedisCache::new(&config.redis_url, 5).await.unwrap()
})
.await
.clone();
// fetch the cached results json.
let cached_results_json = redis_cache.cached_json(&url);
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
redis_cache.clone().cached_json(&url).await;
// check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly.
match cached_results_json {
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
Err(_) => {
if safe_search == 4 {
let mut results: SearchResults = SearchResults::default();
let mut _flag: bool =
is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
_flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
if _flag {
results.set_disallowed();
results.add_style(&config.style);
results.set_page_query(query);
redis_cache
.cache_results(&serde_json::to_string(&results)?, &url)
.await?;
return Ok(results);
}
}
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
// UI and use that.
let mut results: crate::results::aggregation_models::SearchResults = match req
.cookie("appCookie")
{
let mut results: SearchResults = match req.cookie("appCookie") {
Some(cookie_value) => {
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
let engines = cookie_value
let engines: Vec<EngineHandler> = cookie_value
.engines
.iter()
.filter_map(|name| EngineHandler::new(name))
@ -195,8 +243,9 @@ async fn results(
page,
config.aggregator.random_delay,
config.debug,
engines,
&engines,
config.request_timeout,
safe_search,
)
.await?
}
@ -206,19 +255,41 @@ async fn results(
page,
config.aggregator.random_delay,
config.debug,
config.upstream_search_engines.clone(),
&config.upstream_search_engines,
config.request_timeout,
safe_search,
)
.await?
}
};
results.add_style(config.style.clone());
redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
if results.engine_errors_info().is_empty() && results.results().is_empty() {
results.set_filtered();
}
results.add_style(&config.style);
redis_cache
.cache_results(&serde_json::to_string(&results)?, &url)
.await?;
Ok(results)
}
}
}
fn is_match_from_filter_list(
file_path: &str,
query: &str,
) -> Result<bool, Box<dyn std::error::Error>> {
let mut flag = false;
let mut reader = BufReader::new(File::open(file_path)?);
for line in reader.by_ref().lines() {
let re = Regex::new(&line?)?;
if re.is_match(query) {
flag = true;
break;
}
}
Ok(flag)
}
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
#[get("/robots.txt")]
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {