Merge branch 'rolling' into improve-async-multithreading
This commit is contained in:
commit
74e4fc6169
51 changed files with 548 additions and 339 deletions
|
@ -5,7 +5,7 @@
|
|||
|
||||
use std::net::TcpListener;
|
||||
|
||||
use websurfx::{config_parser::parser::Config, run};
|
||||
use websurfx::{config::parser::Config, run};
|
||||
|
||||
/// The function that launches the main server and registers all the routes of the website.
|
||||
///
|
||||
|
@ -26,7 +26,7 @@ async fn main() -> std::io::Result<()> {
|
|||
|
||||
log::info!("started server on port {}", config.port);
|
||||
|
||||
let listener = TcpListener::bind((config.binding_ip_addr.clone(), config.port))?;
|
||||
let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;
|
||||
|
||||
run(listener, config)?.await
|
||||
}
|
||||
|
|
8
src/cache/cacher.rs
vendored
8
src/cache/cacher.rs
vendored
|
@ -32,7 +32,7 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes an url as string.
|
||||
fn compute_url_hash(url: &str) -> String {
|
||||
fn hash_url(url: &str) -> String {
|
||||
format!("{:?}", compute(url))
|
||||
}
|
||||
|
||||
|
@ -41,8 +41,8 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes an url as a string.
|
||||
pub fn cached_results_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = Self::compute_url_hash(url);
|
||||
pub fn cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = Self::hash_url(url);
|
||||
Ok(self.connection.get(hashed_url_string)?)
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ impl RedisCache {
|
|||
json_results: String,
|
||||
url: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = Self::compute_url_hash(url);
|
||||
let hashed_url_string = Self::hash_url(url);
|
||||
|
||||
// put results_json into cache
|
||||
self.connection.set(&hashed_url_string, json_results)?;
|
||||
|
|
|
@ -14,9 +14,9 @@ static CONFIG_FILE_NAME: &str = "config.lua";
|
|||
/// # Fields
|
||||
//
|
||||
/// * `port` - It stores the parsed port number option on which the server should launch.
|
||||
/// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch
|
||||
/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
|
||||
/// * `style` - It stores the theming options for the website.
|
||||
/// * `redis_connection_url` - It stores the redis connection url address on which the redis
|
||||
/// * `redis_url` - It stores the redis connection url address on which the redis
|
||||
/// client should connect.
|
||||
/// * `aggregator` - It stores the option to whether enable or disable production use.
|
||||
/// * `logging` - It stores the option to whether enable or disable logs.
|
||||
|
@ -25,10 +25,10 @@ static CONFIG_FILE_NAME: &str = "config.lua";
|
|||
#[derive(Clone)]
|
||||
pub struct Config {
|
||||
pub port: u16,
|
||||
pub binding_ip_addr: String,
|
||||
pub binding_ip: String,
|
||||
pub style: Style,
|
||||
pub redis_connection_url: String,
|
||||
pub aggregator: AggreatorConfig,
|
||||
pub redis_url: String,
|
||||
pub aggregator: AggregatorConfig,
|
||||
pub logging: bool,
|
||||
pub debug: bool,
|
||||
pub upstream_search_engines: Vec<String>,
|
||||
|
@ -41,47 +41,38 @@ pub struct Config {
|
|||
/// * `random_delay` - It stores the option to whether enable or disable random delays between
|
||||
/// requests.
|
||||
#[derive(Clone)]
|
||||
pub struct AggreatorConfig {
|
||||
pub struct AggregatorConfig {
|
||||
pub random_delay: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// A function which parses the config.lua file and puts all the parsed options in the newly
|
||||
/// contructed Config struct and returns it.
|
||||
/// constructed Config struct and returns it.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// Returns a lua parse error if parsing of the config.lua file fails or has a syntax error
|
||||
/// or io error if the config.lua file doesn't exists otherwise it returns a newly contructed
|
||||
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
|
||||
/// Config struct with all the parsed config options from the parsed config file.
|
||||
pub fn parse() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Lua::new().context(|context| -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let globals = context.globals();
|
||||
|
||||
context
|
||||
.load(&fs::read_to_string(
|
||||
Config::handle_different_config_file_path()?,
|
||||
)?)
|
||||
.load(&fs::read_to_string(Config::config_path()?)?)
|
||||
.exec()?;
|
||||
|
||||
let production_use = globals.get::<_, bool>("production_use")?;
|
||||
let aggregator_config = if production_use {
|
||||
AggreatorConfig { random_delay: true }
|
||||
} else {
|
||||
AggreatorConfig {
|
||||
random_delay: false,
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Config {
|
||||
port: globals.get::<_, u16>("port")?,
|
||||
binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
|
||||
binding_ip: globals.get::<_, String>("binding_ip")?,
|
||||
style: Style::new(
|
||||
globals.get::<_, String>("theme")?,
|
||||
globals.get::<_, String>("colorscheme")?,
|
||||
),
|
||||
redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
|
||||
aggregator: aggregator_config,
|
||||
redis_url: globals.get::<_, String>("redis_url")?,
|
||||
aggregator: AggregatorConfig {
|
||||
random_delay: globals.get::<_, bool>("production_use")?,
|
||||
},
|
||||
logging: globals.get::<_, bool>("logging")?,
|
||||
debug: globals.get::<_, bool>("debug")?,
|
||||
upstream_search_engines: globals
|
||||
|
@ -104,35 +95,37 @@ impl Config {
|
|||
/// one (3).
|
||||
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
fn handle_different_config_file_path() -> Result<String, Box<dyn std::error::Error>> {
|
||||
if Path::new(
|
||||
format!(
|
||||
"{}/.config/{}/config.lua",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
.exists()
|
||||
{
|
||||
Ok(format!(
|
||||
fn config_path() -> Result<String, Box<dyn std::error::Error>> {
|
||||
// check user config
|
||||
|
||||
let path = format!(
|
||||
"{}/.config/{}/config.lua",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME
|
||||
);
|
||||
if Path::new(path.as_str()).exists() {
|
||||
return Ok(format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
CONFIG_FILE_NAME
|
||||
))
|
||||
} else if Path::new(
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str(),
|
||||
)
|
||||
.exists()
|
||||
{
|
||||
Ok("/etc/xdg/websurfx/config.lua".to_string())
|
||||
} else if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
|
||||
));
|
||||
}
|
||||
|
||||
// look for config in /etc/xdg
|
||||
if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
|
||||
.exists()
|
||||
{
|
||||
Ok("./websurfx/config.lua".to_string())
|
||||
} else {
|
||||
Err("Config file not found!!".to_string().into())
|
||||
return Ok("/etc/xdg/websurfx/config.lua".to_string());
|
||||
}
|
||||
|
||||
// use dev config
|
||||
if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists()
|
||||
{
|
||||
return Ok("./websurfx/config.lua".to_string());
|
||||
}
|
||||
|
||||
// if no of the configs above exist, return error
|
||||
Err("Config file not found!!".to_string().into())
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
//! This module provides public models for handling, storing and serializing parsed config file
|
||||
//! options from config.lua by grouping them togather.
|
||||
//! options from config.lua by grouping them together.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -7,7 +7,7 @@ use std::collections::HashMap;
|
|||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
use crate::results::aggregation_models::RawSearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||
//! the upstream search engines with the search query provided by the user.
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
use crate::results::aggregation_models::RawSearchResult;
|
||||
use error_stack::{IntoReport, Result, ResultExt};
|
||||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
|
@ -14,7 +14,7 @@ use std::{collections::HashMap, fmt, time::Duration};
|
|||
/// search engines.
|
||||
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
||||
/// all other errors occuring within the code handling the `upstream search engines`.
|
||||
/// all other errors occurring within the code handling the `upstream search engines`.
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
EmptyResultSet,
|
||||
|
|
|
@ -6,7 +6,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
|||
use scraper::{Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
||||
use crate::results::aggregation_models::RawSearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
|
|
|
@ -1 +1 @@
|
|||
pub mod public_path_handler;
|
||||
pub mod public_paths;
|
||||
|
|
|
@ -17,15 +17,17 @@ static PUBLIC_DIRECTORY_NAME: &str = "public";
|
|||
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
||||
/// 2. Under project folder ( or codebase in other words) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
pub fn handle_different_public_path() -> Result<String, Error> {
|
||||
pub fn public_path() -> Result<String, Error> {
|
||||
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||
Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME))
|
||||
} else if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||
Ok(format!("./{}", PUBLIC_DIRECTORY_NAME))
|
||||
} else {
|
||||
Err(Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"Themes (public) folder not found!!",
|
||||
))
|
||||
return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
|
||||
}
|
||||
|
||||
if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||
return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
|
||||
}
|
||||
|
||||
Err(Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"Themes (public) folder not found!!",
|
||||
))
|
||||
}
|
12
src/lib.rs
12
src/lib.rs
|
@ -2,10 +2,10 @@
|
|||
//! and register all the routes for the `websurfx` meta search engine website.
|
||||
|
||||
pub mod cache;
|
||||
pub mod config_parser;
|
||||
pub mod config;
|
||||
pub mod engines;
|
||||
pub mod handler;
|
||||
pub mod search_results_handler;
|
||||
pub mod results;
|
||||
pub mod server;
|
||||
|
||||
use std::net::TcpListener;
|
||||
|
@ -14,9 +14,9 @@ use crate::server::routes;
|
|||
|
||||
use actix_files as fs;
|
||||
use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
|
||||
use config_parser::parser::Config;
|
||||
use config::parser::Config;
|
||||
use handlebars::Handlebars;
|
||||
use handler::public_path_handler::handle_different_public_path;
|
||||
use handler::public_paths::public_path;
|
||||
|
||||
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
||||
///
|
||||
|
@ -32,7 +32,7 @@ use handler::public_path_handler::handle_different_public_path;
|
|||
///
|
||||
/// ```rust
|
||||
/// use std::net::TcpListener;
|
||||
/// use websurfx::{config_parser::parser::Config, run};
|
||||
/// use websurfx::{config::parser::Config, run};
|
||||
///
|
||||
/// let config = Config::parse().unwrap();
|
||||
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
||||
|
@ -41,7 +41,7 @@ use handler::public_path_handler::handle_different_public_path;
|
|||
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||
let mut handlebars: Handlebars = Handlebars::new();
|
||||
|
||||
let public_folder_path: String = handle_different_public_path()?;
|
||||
let public_folder_path: String = public_path()?;
|
||||
|
||||
handlebars
|
||||
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{config_parser::parser_models::Style, engines::engine_models::EngineError};
|
||||
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
||||
|
||||
/// A named struct to store, serialize and deserializes the individual search result from all the
|
||||
/// scraped and aggregated search results from the upstream search engines.
|
|
@ -1,14 +1,14 @@
|
|||
//! This module provides the functionality to handle different routes of the `websurfx`
|
||||
//! meta search engine website and provide approriate response to each route/page
|
||||
//! meta search engine website and provide appropriate response to each route/page
|
||||
//! when requested.
|
||||
|
||||
use std::fs::read_to_string;
|
||||
|
||||
use crate::{
|
||||
cache::cacher::RedisCache,
|
||||
config_parser::parser::Config,
|
||||
handler::public_path_handler::handle_different_public_path,
|
||||
search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate},
|
||||
config::parser::Config,
|
||||
handler::public_paths::public_path,
|
||||
results::{aggregation_models::SearchResults, aggregator::aggregate},
|
||||
};
|
||||
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||
use handlebars::Handlebars;
|
||||
|
@ -87,86 +87,25 @@ pub async fn search(
|
|||
config: web::Data<Config>,
|
||||
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
||||
|
||||
//Initialize redis cache connection struct
|
||||
let mut redis_cache = RedisCache::new(config.redis_connection_url.clone())?;
|
||||
match ¶ms.q {
|
||||
Some(query) => {
|
||||
if query.trim().is_empty() {
|
||||
Ok(HttpResponse::Found()
|
||||
return Ok(HttpResponse::Found()
|
||||
.insert_header(("location", "/"))
|
||||
.finish())
|
||||
} else {
|
||||
let page_url: String; // Declare the page_url variable without initializing it
|
||||
|
||||
// ...
|
||||
|
||||
let page = match params.page {
|
||||
Some(page_number) => {
|
||||
if page_number <= 1 {
|
||||
page_url = format!(
|
||||
"http://{}:{}/search?q={}&page={}",
|
||||
config.binding_ip_addr, config.port, query, 1
|
||||
);
|
||||
1
|
||||
} else {
|
||||
page_url = format!(
|
||||
"http://{}:{}/search?q={}&page={}",
|
||||
config.binding_ip_addr, config.port, query, page_number
|
||||
);
|
||||
|
||||
page_number
|
||||
}
|
||||
}
|
||||
None => {
|
||||
page_url = format!(
|
||||
"http://{}:{}{}&page={}",
|
||||
config.binding_ip_addr,
|
||||
config.port,
|
||||
req.uri(),
|
||||
1
|
||||
);
|
||||
|
||||
1
|
||||
}
|
||||
};
|
||||
|
||||
// fetch the cached results json.
|
||||
let cached_results_json = redis_cache.cached_results_json(&page_url);
|
||||
// check if fetched catch results was indeed fetched or it was an error and if so
|
||||
// handle the data accordingly.
|
||||
match cached_results_json {
|
||||
Ok(results_json) => {
|
||||
let new_results_json: SearchResults = serde_json::from_str(&results_json)?;
|
||||
let page_content: String = hbs.render("search", &new_results_json)?;
|
||||
Ok(HttpResponse::Ok().body(page_content))
|
||||
}
|
||||
Err(_) => {
|
||||
// check if the cookie value is empty or not if it is empty then use the
|
||||
// default selected upstream search engines from the config file otherwise
|
||||
// parse the non-empty cookie and grab the user selected engines from the
|
||||
// UI and use that.
|
||||
let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
|
||||
Some(cookie_value) => {
|
||||
let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
||||
aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, cookie_value.engines).await?
|
||||
},
|
||||
None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
|
||||
};
|
||||
results_json.add_style(config.style.clone());
|
||||
// check whether the results grabbed from the upstream engines are empty or
|
||||
// not if they are empty then set the empty_result_set option to true in
|
||||
// the result json.
|
||||
if results_json.is_empty_result_set() {
|
||||
results_json.set_empty_result_set();
|
||||
}
|
||||
redis_cache
|
||||
.cache_results(serde_json::to_string(&results_json)?, &page_url)?;
|
||||
let page_content: String = hbs.render("search", &results_json)?;
|
||||
Ok(HttpResponse::Ok().body(page_content))
|
||||
}
|
||||
}
|
||||
.finish());
|
||||
}
|
||||
let page = match ¶ms.page {
|
||||
Some(page) => *page,
|
||||
None => 0,
|
||||
};
|
||||
|
||||
let url = format!(
|
||||
"http://{}:{}/search?q={}&page={}",
|
||||
config.binding_ip, config.port, query, page
|
||||
);
|
||||
let results_json = results(url, &config, query.to_string(), page, req).await?;
|
||||
let page_content: String = hbs.render("search", &results_json)?;
|
||||
Ok(HttpResponse::Ok().body(page_content))
|
||||
}
|
||||
None => Ok(HttpResponse::Found()
|
||||
.insert_header(("location", "/"))
|
||||
|
@ -174,11 +113,70 @@ pub async fn search(
|
|||
}
|
||||
}
|
||||
|
||||
/// Fetches the results for a query and page.
|
||||
/// First checks the redis cache, if that fails it gets proper results
|
||||
async fn results(
|
||||
url: String,
|
||||
config: &Config,
|
||||
query: String,
|
||||
page: u32,
|
||||
req: HttpRequest,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
//Initialize redis cache connection struct
|
||||
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
||||
// fetch the cached results json.
|
||||
let cached_results_json = redis_cache.cached_json(&url);
|
||||
// check if fetched catch results was indeed fetched or it was an error and if so
|
||||
// handle the data accordingly.
|
||||
match cached_results_json {
|
||||
Ok(results_json) => Ok(serde_json::from_str::<SearchResults>(&results_json).unwrap()),
|
||||
Err(_) => {
|
||||
// check if the cookie value is empty or not if it is empty then use the
|
||||
// default selected upstream search engines from the config file otherwise
|
||||
// parse the non-empty cookie and grab the user selected engines from the
|
||||
// UI and use that.
|
||||
let mut results_json: crate::results::aggregation_models::SearchResults = match req
|
||||
.cookie("appCookie")
|
||||
{
|
||||
Some(cookie_value) => {
|
||||
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
||||
aggregate(
|
||||
query,
|
||||
page,
|
||||
config.aggregator.random_delay,
|
||||
config.debug,
|
||||
cookie_value.engines,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
None => {
|
||||
aggregate(
|
||||
query,
|
||||
page,
|
||||
config.aggregator.random_delay,
|
||||
config.debug,
|
||||
config.upstream_search_engines.clone(),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
results_json.add_style(config.style.clone());
|
||||
// check whether the results grabbed from the upstream engines are empty or
|
||||
// not if they are empty then set the empty_result_set option to true in
|
||||
// the result json.
|
||||
if results_json.is_empty_result_set() {
|
||||
results_json.set_empty_result_set();
|
||||
}
|
||||
redis_cache.cache_results(serde_json::to_string(&results_json)?, &url)?;
|
||||
Ok(results_json)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||
#[get("/robots.txt")]
|
||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
let page_content: String =
|
||||
read_to_string(format!("{}/robots.txt", handle_different_public_path()?))?;
|
||||
let page_content: String = read_to_string(format!("{}/robots.txt", public_path()?))?;
|
||||
Ok(HttpResponse::Ok()
|
||||
.content_type("text/plain; charset=ascii")
|
||||
.body(page_content))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue