2023-09-03 20:50:50 +03:00
|
|
|
//! This module handles the search route of the search engine website.
|
2023-04-22 14:35:07 +03:00
|
|
|
|
2023-05-02 11:58:21 +03:00
|
|
|
use crate::{
|
|
|
|
cache::cacher::RedisCache,
|
2023-07-03 19:30:25 +02:00
|
|
|
config::parser::Config,
|
2023-09-03 20:50:50 +03:00
|
|
|
models::{
|
|
|
|
aggregation_models::SearchResults,
|
|
|
|
engine_models::EngineHandler,
|
|
|
|
server_models::{Cookie, SearchParams},
|
|
|
|
},
|
|
|
|
results::aggregator::aggregate,
|
2023-05-02 11:58:21 +03:00
|
|
|
};
|
2023-04-22 14:35:07 +03:00
|
|
|
use actix_web::{get, web, HttpRequest, HttpResponse};
|
|
|
|
use handlebars::Handlebars;
|
2023-08-06 20:31:30 +03:00
|
|
|
use tokio::join;
|
2023-04-22 14:35:07 +03:00
|
|
|
|
2023-04-27 17:53:28 +03:00
|
|
|
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
|
|
|
/// two search url parameters `q` and `page` where `page` parameter is optional.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```bash
|
|
|
|
/// curl "http://127.0.0.1:8080/search?q=sweden&page=1"
|
|
|
|
/// ```
|
2023-04-30 18:16:08 +03:00
|
|
|
///
|
2023-04-27 17:53:28 +03:00
|
|
|
/// Or
|
|
|
|
///
|
|
|
|
/// ```bash
|
|
|
|
/// curl "http://127.0.0.1:8080/search?q=sweden"
|
|
|
|
/// ```
|
2023-04-22 14:35:07 +03:00
|
|
|
#[get("/search")]
|
|
|
|
pub async fn search(
|
|
|
|
hbs: web::Data<Handlebars<'_>>,
|
|
|
|
req: HttpRequest,
|
2023-04-30 18:16:08 +03:00
|
|
|
config: web::Data<Config>,
|
2023-04-22 14:35:07 +03:00
|
|
|
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
|
|
|
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
|
|
|
match ¶ms.q {
|
|
|
|
Some(query) => {
|
|
|
|
if query.trim().is_empty() {
|
2023-07-03 19:30:25 +02:00
|
|
|
return Ok(HttpResponse::Found()
|
2023-04-22 14:35:07 +03:00
|
|
|
.insert_header(("location", "/"))
|
2023-07-03 19:30:25 +02:00
|
|
|
.finish());
|
2023-04-22 14:35:07 +03:00
|
|
|
}
|
2023-07-03 19:30:25 +02:00
|
|
|
let page = match ¶ms.page {
|
|
|
|
Some(page) => *page,
|
2023-08-06 20:31:30 +03:00
|
|
|
None => 1,
|
2023-07-03 19:30:25 +02:00
|
|
|
};
|
|
|
|
|
2023-08-06 20:31:30 +03:00
|
|
|
let (_, results, _) = join!(
|
|
|
|
results(
|
|
|
|
format!(
|
|
|
|
"http://{}:{}/search?q={}&page={}",
|
|
|
|
config.binding_ip,
|
|
|
|
config.port,
|
|
|
|
query,
|
|
|
|
page - 1
|
|
|
|
),
|
|
|
|
&config,
|
|
|
|
query.to_string(),
|
|
|
|
page - 1,
|
|
|
|
req.clone(),
|
|
|
|
),
|
|
|
|
results(
|
|
|
|
format!(
|
|
|
|
"http://{}:{}/search?q={}&page={}",
|
|
|
|
config.binding_ip, config.port, query, page
|
|
|
|
),
|
|
|
|
&config,
|
|
|
|
query.to_string(),
|
|
|
|
page,
|
|
|
|
req.clone(),
|
|
|
|
),
|
|
|
|
results(
|
|
|
|
format!(
|
|
|
|
"http://{}:{}/search?q={}&page={}",
|
|
|
|
config.binding_ip,
|
|
|
|
config.port,
|
|
|
|
query,
|
|
|
|
page + 1
|
|
|
|
),
|
|
|
|
&config,
|
|
|
|
query.to_string(),
|
|
|
|
page + 1,
|
|
|
|
req.clone(),
|
|
|
|
)
|
2023-07-03 19:30:25 +02:00
|
|
|
);
|
2023-08-06 20:31:30 +03:00
|
|
|
|
|
|
|
let page_content: String = hbs.render("search", &results?)?;
|
2023-07-03 19:30:25 +02:00
|
|
|
Ok(HttpResponse::Ok().body(page_content))
|
2023-04-22 14:35:07 +03:00
|
|
|
}
|
|
|
|
None => Ok(HttpResponse::Found()
|
|
|
|
.insert_header(("location", "/"))
|
|
|
|
.finish()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-03 19:23:34 +03:00
|
|
|
/// Fetches the results for a query and page. It First checks the redis cache, if that
|
|
|
|
/// fails it gets proper results by requesting from the upstream search engines.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * `url` - It takes the url of the current page that requested the search results for a
|
|
|
|
/// particular search query.
|
|
|
|
/// * `config` - It takes a parsed config struct.
|
|
|
|
/// * `query` - It takes the page number as u32 value.
|
|
|
|
/// * `req` - It takes the `HttpRequest` struct as a value.
|
|
|
|
///
|
|
|
|
/// # Error
|
|
|
|
///
|
|
|
|
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
|
|
|
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
2023-07-04 15:11:30 -07:00
|
|
|
async fn results(
|
2023-07-03 19:30:25 +02:00
|
|
|
url: String,
|
|
|
|
config: &Config,
|
2023-07-15 19:50:31 +03:00
|
|
|
query: String,
|
2023-07-03 19:30:25 +02:00
|
|
|
page: u32,
|
2023-07-15 19:50:31 +03:00
|
|
|
req: HttpRequest,
|
2023-07-03 19:30:25 +02:00
|
|
|
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
2023-09-03 19:23:34 +03:00
|
|
|
// Initialize redis cache connection struct
|
2023-07-03 19:30:25 +02:00
|
|
|
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
|
|
|
// fetch the cached results json.
|
2023-07-04 15:11:30 -07:00
|
|
|
let cached_results_json = redis_cache.cached_json(&url);
|
2023-07-17 10:50:15 +03:00
|
|
|
// check if fetched cache results was indeed fetched or it was an error and if so
|
2023-07-03 19:30:25 +02:00
|
|
|
// handle the data accordingly.
|
|
|
|
match cached_results_json {
|
2023-07-17 13:17:24 +03:00
|
|
|
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
|
2023-07-03 19:30:25 +02:00
|
|
|
Err(_) => {
|
2023-07-15 19:50:31 +03:00
|
|
|
// check if the cookie value is empty or not if it is empty then use the
|
|
|
|
// default selected upstream search engines from the config file otherwise
|
|
|
|
// parse the non-empty cookie and grab the user selected engines from the
|
|
|
|
// UI and use that.
|
2023-09-03 20:50:50 +03:00
|
|
|
let mut results: SearchResults = match req.cookie("appCookie") {
|
2023-07-15 19:50:31 +03:00
|
|
|
Some(cookie_value) => {
|
|
|
|
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
2023-08-18 10:43:53 +02:00
|
|
|
|
|
|
|
let engines = cookie_value
|
|
|
|
.engines
|
|
|
|
.iter()
|
|
|
|
.filter_map(|name| EngineHandler::new(name))
|
|
|
|
.collect();
|
|
|
|
|
2023-07-15 19:50:31 +03:00
|
|
|
aggregate(
|
|
|
|
query,
|
|
|
|
page,
|
|
|
|
config.aggregator.random_delay,
|
|
|
|
config.debug,
|
2023-08-18 10:43:53 +02:00
|
|
|
engines,
|
2023-07-30 10:53:48 +03:00
|
|
|
config.request_timeout,
|
2023-07-15 19:50:31 +03:00
|
|
|
)
|
|
|
|
.await?
|
|
|
|
}
|
|
|
|
None => {
|
|
|
|
aggregate(
|
|
|
|
query,
|
|
|
|
page,
|
|
|
|
config.aggregator.random_delay,
|
|
|
|
config.debug,
|
|
|
|
config.upstream_search_engines.clone(),
|
2023-07-30 10:53:48 +03:00
|
|
|
config.request_timeout,
|
2023-07-15 19:50:31 +03:00
|
|
|
)
|
|
|
|
.await?
|
|
|
|
}
|
|
|
|
};
|
2023-07-17 13:17:24 +03:00
|
|
|
results.add_style(config.style.clone());
|
|
|
|
redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
|
|
|
|
Ok(results)
|
2023-07-03 19:30:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|