improve search route

This commit is contained in:
Milim 2024-08-15 08:55:27 +02:00
parent 9a00f91ebc
commit 23799eeb9e
No known key found for this signature in database
4 changed files with 123 additions and 210 deletions

View file

@ -3,7 +3,7 @@
use figment::{providers::Serialized, Figment};
use serde::{Deserialize, Serialize};
/// A named struct which stores the parsed config file options.
/// Struct holding config Options
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Config {
/// It stores the parsed port number option on which the server should launch.
@ -32,15 +32,7 @@ pub struct Config {
pub pool_idle_connection_timeout: u8,
}
/// A named struct which stores,deserializes, serializes and groups the parsed config file options
/// of theme and colorscheme names into the Style struct which derives the `Clone`, `Serialize`
/// and Deserialize traits where the `Clone` trait is derived for allowing the struct to be
/// cloned and passed to the server as a shared data between all routes except `/robots.txt` and
/// the `Serialize` trait has been derived for allowing the object to be serialized so that it
/// can be passed to handlebars template files and the `Deserialize` trait has been derived in
/// order to allow the deserializing the json back to struct in aggregate function in
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
/// it to the template files.
/// A struct holding style config
#[derive(Default, Debug, Clone, Deserialize, Serialize)]
pub struct Style {
/// It stores the parsed theme option used to set a theme for the website.

View file

@ -40,7 +40,7 @@ async fn main() {
.init();
log::info!(
"started server on port {} and IP {}",
"starting server on port {} and IP {}",
config.port,
config.binding_ip
);

View file

@ -11,7 +11,7 @@ use crate::config::Style;
pub struct SearchParams {
/// It stores the search parameter option `q` (or query in simple words)
/// of the search url.
pub q: Option<String>,
pub query: Option<String>,
/// It stores the search parameter `page` (or pageno in simple words)
/// of the search url.
pub page: Option<u32>,

View file

@ -20,13 +20,13 @@ use tokio::join;
/// # Example
///
/// ```bash
/// curl "http://127.0.0.1:8080/search?q=sweden&page=1"
/// wget "http://127.0.0.1:8080/search?q=sweden&page=1"
/// ```
///
/// Or
///
/// ```bash
/// curl "http://127.0.0.1:8080/search?q=sweden"
/// wget "http://127.0.0.1:8080/search?q=sweden"
/// ```
#[get("/search")]
pub async fn search(
@ -34,98 +34,90 @@ pub async fn search(
config: web::Data<Config>,
cache: web::Data<Cache>,
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
use std::sync::Arc;
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
match &params.q {
Some(query) => {
if query.trim().is_empty() {
return Ok(HttpResponse::TemporaryRedirect()
.insert_header(("location", "/"))
.finish());
}
let cookie = req.cookie("appCookie");
// Get search settings using the user's cookie or from the server's config
let search_settings: server_models::Cookie<'_> = cookie
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
.unwrap_or_else(|| {
server_models::Cookie::build(
&config.style,
config
.upstream_search_engines
.iter()
.map(|e| Cow::Borrowed(e.as_str()))
.collect(),
)
});
// Closure wrapping the results function capturing local references
let get_results =
|page| results(config.clone(), cache.clone(), query, page, &search_settings);
// .max(1) makes sure that the page >= 0.
let page = params.page.unwrap_or(1).max(1) - 1;
let previous_page = page.saturating_sub(1);
let next_page = page + 1;
let mut results = Arc::new((SearchResults::default(), String::default()));
if page != previous_page {
let (previous_results, current_results, next_results) = join!(
get_results(previous_page),
get_results(page),
get_results(next_page)
);
let (parsed_previous_results, parsed_next_results) =
(previous_results?, next_results?);
let (cache_keys, results_list) = (
[
parsed_previous_results.1,
results.1.clone(),
parsed_next_results.1,
],
[
parsed_previous_results.0,
results.0.clone(),
parsed_next_results.0,
],
);
results = Arc::new(current_results?);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys) });
} else {
let (current_results, next_results) =
join!(get_results(page), get_results(page + 1));
let parsed_next_results = next_results?;
results = Arc::new(current_results?);
let (cache_keys, results_list) = (
[results.1.clone(), parsed_next_results.1.clone()],
[results.0.clone(), parsed_next_results.0],
);
tokio::spawn(async move { cache.cache_results(&results_list, &cache_keys) });
}
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::search::search(
&config.style.colorscheme,
&config.style.theme,
&config.style.animation,
query,
&results.0,
)
.0,
))
}
None => Ok(HttpResponse::TemporaryRedirect()
if !params.query.as_ref().is_some_and(|q| !q.trim().is_empty()) {
return Ok(HttpResponse::TemporaryRedirect()
.insert_header(("location", "/"))
.finish()),
.finish());
}
let query = params.query.as_ref().unwrap();
let cookie = req.cookie("appCookie");
// Get search settings using the user's cookie or from the server's config
let search_settings: server_models::Cookie<'_> = cookie
.and_then(|cookie_value| serde_json::from_str(cookie_value.value()).ok())
.unwrap_or_else(|| {
server_models::Cookie::build(
&config.style,
config
.upstream_search_engines
.iter()
.map(|e| Cow::Borrowed(e.as_str()))
.collect(),
)
});
// Closure wrapping the results function capturing local references
let get_results = |page| results(config.clone(), cache.clone(), query, page, &search_settings);
// .max(1) makes sure that the page >= 0.
let page = params.page.unwrap_or(1).max(1) - 1;
let previous_page = page.saturating_sub(1);
let next_page = page + 1;
let mut results = (SearchResults::default(), String::default());
if page != previous_page {
let (previous_results, current_results, next_results) = join!(
get_results(previous_page),
get_results(page),
get_results(next_page)
);
let (parsed_previous_results, parsed_next_results) = (previous_results?, next_results?);
let (cache_keys, results_list) = (
[
parsed_previous_results.1,
results.1.clone(),
parsed_next_results.1,
],
[
parsed_previous_results.0,
results.0.clone(),
parsed_next_results.0,
],
);
results = current_results?;
cache.cache_results(&results_list, &cache_keys);
} else {
let (current_results, next_results) = join!(get_results(page), get_results(page + 1));
let parsed_next_results = next_results?;
results = current_results?;
let (cache_keys, results_list) = (
[results.1.clone(), parsed_next_results.1.clone()],
[results.0.clone(), parsed_next_results.0],
);
cache.cache_results(&results_list, &cache_keys);
}
Ok(HttpResponse::Ok().content_type(ContentType::html()).body(
crate::templates::views::search::search(
&config.style.colorscheme,
&config.style.theme,
&config.style.animation,
query,
&results.0,
)
.0,
))
}
/// Fetches the results for a query and page. It First checks the redis cache, if that
@ -153,120 +145,49 @@ async fn results(
// eagerly parse cookie value to evaluate safe search level
let cache_key = format!(
"http://{}:{}/search?q={}&page={}&engines={}",
config.binding_ip,
config.port,
"search?q={}&page={}&engines={}",
query,
page,
search_settings.engines.join(",")
);
// fetch the cached results json.
let cached_results = cache.cached_results(&cache_key);
// check if fetched cache results was indeed fetched or it was an error and if so
// handle the data accordingly.
match cached_results {
Some(results) => Ok((results, cache_key)),
None => {
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
// UI and use that.
let mut results: SearchResults = match search_settings.engines.is_empty() {
false => {
aggregate(
query,
page,
config,
&search_settings
.engines
.iter()
.filter_map(|engine| EngineHandler::new(engine).ok())
.collect::<Vec<EngineHandler>>(),
)
.await?
}
true => {
let mut search_results = SearchResults::default();
search_results.set_no_engines_selected();
search_results
}
};
let (engine_errors_info, results_empty_check, no_engines_selected) = (
results.engine_errors_info().is_empty(),
results.results().is_empty(),
results.no_engines_selected(),
);
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
cache.cache_results(&[results.clone()], &[cache_key.clone()]);
Ok((results, cache_key))
let response = cache.cached_results(&cache_key);
if let Some(results) = response {
return Ok((results, cache_key));
}
// check if the cookie value is empty or not if it is empty then use the
// default selected upstream search engines from the config file otherwise
// parse the non-empty cookie and grab the user selected engines from the
// UI and use that.
let mut results: SearchResults = match search_settings.engines.is_empty() {
false => {
aggregate(
query,
page,
config,
&search_settings
.engines
.iter()
.filter_map(|engine| EngineHandler::new(engine).ok())
.collect::<Vec<EngineHandler>>(),
)
.await?
}
}
}
#[cfg(test)]
mod tests {
use std::time::{SystemTime, UNIX_EPOCH};
/// A helper function which creates a random mock safe search level value.
///
/// # Returns
///
/// Returns an optional u8 value.
fn mock_safe_search_level_value() -> Option<u8> {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.subsec_nanos() as f32;
let delay = ((nanos / 1_0000_0000 as f32).floor() as i8) - 1;
match delay {
-1 => None,
some_num => Some(if some_num > 4 { some_num - 4 } else { some_num } as u8),
true => {
let mut search_results = SearchResults::default();
search_results.set_no_engines_selected();
search_results
}
}
#[test]
/// A test function to test whether the output of the branchless and branched code
/// for the code to choose the appropriate safe search level is same or not.
fn get_safesearch_level_branched_branchless_code_test() {
// Get mock values for the safe search level values for URL parameters, cookie
// and config.
let safe_search_level_from_url = mock_safe_search_level_value();
let cookie_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
let config_safe_search_level = mock_safe_search_level_value().unwrap_or(0);
// Branched code
let safe_search_level_value_from_branched_code = match safe_search_level_from_url {
Some(safe_search_level_from_url_parsed) => {
if config_safe_search_level >= 3 {
config_safe_search_level
} else {
safe_search_level_from_url_parsed
}
}
None => {
if config_safe_search_level >= 3 {
config_safe_search_level
} else {
cookie_safe_search_level
}
}
};
// branchless code
let safe_search_level_value_from_branchless_code =
(u8::from(safe_search_level_from_url.is_some())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3)
* safe_search_level_from_url.unwrap_or(0))))
+ (u8::from(safe_search_level_from_url.is_none())
* ((u8::from(config_safe_search_level >= 3) * config_safe_search_level)
+ (u8::from(config_safe_search_level < 3) * cookie_safe_search_level)));
assert_eq!(
safe_search_level_value_from_branched_code,
safe_search_level_value_from_branchless_code
);
}
};
let (engine_errors_info, results_empty_check, no_engines_selected) = (
results.engine_errors_info().is_empty(),
results.results().is_empty(),
results.no_engines_selected(),
);
results.set_filtered(engine_errors_info & results_empty_check & !no_engines_selected);
cache.cache_results(&[results.clone()], &[cache_key.clone()]);
Ok((results, cache_key))
}