Merge branch 'rolling' into handle-reqwest-errors
This commit is contained in:
commit
975e8a480b
40 changed files with 1250 additions and 206 deletions
|
@ -5,7 +5,6 @@
|
|||
|
||||
use std::net::TcpListener;
|
||||
|
||||
use env_logger::Env;
|
||||
use websurfx::{config_parser::parser::Config, run};
|
||||
|
||||
/// The function that launches the main server and registers all the routes of the website.
|
||||
|
@ -20,7 +19,10 @@ async fn main() -> std::io::Result<()> {
|
|||
let config = Config::parse().unwrap();
|
||||
|
||||
// Initializing logging middleware with level set to default or info.
|
||||
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
||||
if config.logging || config.debug {
|
||||
use env_logger::Env;
|
||||
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
||||
}
|
||||
|
||||
log::info!("started server on port {}", config.port);
|
||||
|
||||
|
|
36
src/cache/cacher.rs
vendored
36
src/cache/cacher.rs
vendored
|
@ -10,9 +10,8 @@ use redis::{Client, Commands, Connection};
|
|||
/// # Fields
|
||||
///
|
||||
/// * `redis_connection_url` - It stores the redis Connection url address.
|
||||
#[derive(Clone)]
|
||||
pub struct RedisCache {
|
||||
redis_connection_url: String,
|
||||
connection: Connection,
|
||||
}
|
||||
|
||||
impl RedisCache {
|
||||
|
@ -21,10 +20,11 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `redis_connection_url` - It stores the redis Connection url address.
|
||||
pub fn new(redis_connection_url: String) -> Self {
|
||||
RedisCache {
|
||||
redis_connection_url,
|
||||
}
|
||||
pub fn new(redis_connection_url: String) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let client = Client::open(redis_connection_url)?;
|
||||
let connection = client.get_connection()?;
|
||||
let redis_cache = RedisCache { connection };
|
||||
Ok(redis_cache)
|
||||
}
|
||||
|
||||
/// A helper function which computes the hash of the url and formats and returns it as string.
|
||||
|
@ -32,7 +32,7 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes an url as string.
|
||||
fn compute_url_hash(self, url: &str) -> String {
|
||||
fn compute_url_hash(url: &str) -> String {
|
||||
format!("{:?}", compute(url))
|
||||
}
|
||||
|
||||
|
@ -41,11 +41,9 @@ impl RedisCache {
|
|||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes an url as a string.
|
||||
pub fn cached_results_json(self, url: String) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = self.clone().compute_url_hash(&url);
|
||||
let mut redis_connection: Connection =
|
||||
Client::open(self.redis_connection_url)?.get_connection()?;
|
||||
Ok(redis_connection.get(hashed_url_string)?)
|
||||
pub fn cached_results_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = Self::compute_url_hash(url);
|
||||
Ok(self.connection.get(hashed_url_string)?)
|
||||
}
|
||||
|
||||
/// A function which caches the results by using the hashed `url` as the key and
|
||||
|
@ -57,20 +55,18 @@ impl RedisCache {
|
|||
/// * `json_results` - It takes the json results string as an argument.
|
||||
/// * `url` - It takes the url as a String.
|
||||
pub fn cache_results(
|
||||
self,
|
||||
&mut self,
|
||||
json_results: String,
|
||||
url: String,
|
||||
url: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let hashed_url_string = self.clone().compute_url_hash(&url);
|
||||
let mut redis_connection: Connection =
|
||||
Client::open(self.redis_connection_url)?.get_connection()?;
|
||||
let hashed_url_string = Self::compute_url_hash(url);
|
||||
|
||||
// put results_json into cache
|
||||
redis_connection.set(hashed_url_string.clone(), json_results)?;
|
||||
self.connection.set(&hashed_url_string, json_results)?;
|
||||
|
||||
// Set the TTL for the key to 60 seconds
|
||||
redis_connection
|
||||
.expire::<String, u32>(hashed_url_string.clone(), 60)
|
||||
self.connection
|
||||
.expire::<String, u32>(hashed_url_string, 60)
|
||||
.unwrap();
|
||||
|
||||
Ok(())
|
||||
|
|
2
src/cache/mod.rs
vendored
2
src/cache/mod.rs
vendored
|
@ -1 +1 @@
|
|||
pub mod cacher;
|
||||
pub mod cacher;
|
||||
|
|
|
@ -3,7 +3,11 @@
|
|||
|
||||
use super::parser_models::Style;
|
||||
use rlua::Lua;
|
||||
use std::fs;
|
||||
use std::{format, fs, path::Path};
|
||||
|
||||
// ------- Constants --------
|
||||
static COMMON_DIRECTORY_NAME: &str = "websurfx";
|
||||
static CONFIG_FILE_NAME: &str = "config.lua";
|
||||
|
||||
/// A named struct which stores the parsed config file options.
|
||||
///
|
||||
|
@ -20,6 +24,16 @@ pub struct Config {
|
|||
pub binding_ip_addr: String,
|
||||
pub style: Style,
|
||||
pub redis_connection_url: String,
|
||||
pub aggregator: AggreatorConfig,
|
||||
pub logging: bool,
|
||||
pub debug: bool,
|
||||
}
|
||||
|
||||
/// Configuration options for the aggregator.
|
||||
#[derive(Clone)]
|
||||
pub struct AggreatorConfig {
|
||||
/// Whether to introduce a random delay before sending the request to the search engine.
|
||||
pub random_delay: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
@ -32,15 +46,24 @@ impl Config {
|
|||
/// or io error if the config.lua file doesn't exists otherwise it returns a newly contructed
|
||||
/// Config struct with all the parsed config options from the parsed config file.
|
||||
pub fn parse() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let lua = Lua::new();
|
||||
|
||||
lua.context(|context| {
|
||||
Lua::new().context(|context| -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let globals = context.globals();
|
||||
|
||||
context
|
||||
.load(&fs::read_to_string("./websurfx/config.lua")?)
|
||||
.load(&fs::read_to_string(
|
||||
Config::handle_different_config_file_path()?,
|
||||
)?)
|
||||
.exec()?;
|
||||
|
||||
let production_use = globals.get::<_, bool>("production_use")?;
|
||||
let aggregator_config = if production_use {
|
||||
AggreatorConfig { random_delay: true }
|
||||
} else {
|
||||
AggreatorConfig {
|
||||
random_delay: false,
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Config {
|
||||
port: globals.get::<_, u16>("port")?,
|
||||
binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
|
||||
|
@ -49,7 +72,53 @@ impl Config {
|
|||
globals.get::<_, String>("colorscheme")?,
|
||||
),
|
||||
redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
|
||||
aggregator: aggregator_config,
|
||||
logging: globals.get::<_, bool>("logging")?,
|
||||
debug: globals.get::<_, bool>("debug")?,
|
||||
})
|
||||
})
|
||||
}
|
||||
/// A helper function which returns an appropriate config file path checking if the config
|
||||
/// file exists on that path.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// Returns a `config file not found!!` error if the config file is not present under following
|
||||
/// paths which are:
|
||||
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
||||
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
||||
/// one (3).
|
||||
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
fn handle_different_config_file_path() -> Result<String, Box<dyn std::error::Error>> {
|
||||
if Path::new(
|
||||
format!(
|
||||
"{}/.config/{}/config.lua",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
.exists()
|
||||
{
|
||||
Ok(format!(
|
||||
"{}/.config/{}/{}",
|
||||
std::env::var("HOME").unwrap(),
|
||||
COMMON_DIRECTORY_NAME,
|
||||
CONFIG_FILE_NAME
|
||||
))
|
||||
} else if Path::new(
|
||||
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str(),
|
||||
)
|
||||
.exists()
|
||||
{
|
||||
Ok("/etc/xdg/websurfx/config.lua".to_string())
|
||||
} else if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
|
||||
.exists()
|
||||
{
|
||||
Ok("./websurfx/config.lua".to_string())
|
||||
} else {
|
||||
Err(format!("Config file not found!!").into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
1
src/handler/mod.rs
Normal file
1
src/handler/mod.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod public_path_handler;
|
31
src/handler/public_path_handler.rs
Normal file
31
src/handler/public_path_handler.rs
Normal file
|
@ -0,0 +1,31 @@
|
|||
//! This module provides the functionality to handle theme folder present on different paths and
|
||||
//! provide one appropriate path on which it is present and can be used.
|
||||
|
||||
use std::io::Error;
|
||||
use std::path::Path;
|
||||
|
||||
// ------- Constants --------
|
||||
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
||||
|
||||
/// A function which returns an appropriate theme directory path checking if the theme
|
||||
/// directory exists on that path.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
||||
/// paths which are:
|
||||
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
||||
/// 2. Under project folder ( or codebase in other words) if it is not present
|
||||
/// here then it returns an error as mentioned above.
|
||||
pub fn handle_different_public_path() -> Result<String, Error> {
|
||||
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||
Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME))
|
||||
} else if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
||||
Ok(format!("./{}", PUBLIC_DIRECTORY_NAME))
|
||||
} else {
|
||||
Err(Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"Themes (public) folder not found!!",
|
||||
))
|
||||
}
|
||||
}
|
16
src/lib.rs
16
src/lib.rs
|
@ -4,6 +4,7 @@
|
|||
pub mod cache;
|
||||
pub mod config_parser;
|
||||
pub mod engines;
|
||||
pub mod handler;
|
||||
pub mod search_results_handler;
|
||||
pub mod server;
|
||||
|
||||
|
@ -15,6 +16,7 @@ use actix_files as fs;
|
|||
use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
|
||||
use config_parser::parser::Config;
|
||||
use handlebars::Handlebars;
|
||||
use handler::public_path_handler::handle_different_public_path;
|
||||
|
||||
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
||||
///
|
||||
|
@ -39,8 +41,10 @@ use handlebars::Handlebars;
|
|||
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
||||
let mut handlebars: Handlebars = Handlebars::new();
|
||||
|
||||
let public_folder_path: String = handle_different_public_path()?;
|
||||
|
||||
handlebars
|
||||
.register_templates_directory(".html", "./public/templates")
|
||||
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
||||
.unwrap();
|
||||
|
||||
let handlebars_ref: web::Data<Handlebars> = web::Data::new(handlebars);
|
||||
|
@ -51,8 +55,14 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|||
.app_data(web::Data::new(config.clone()))
|
||||
.wrap(Logger::default()) // added logging middleware for logging.
|
||||
// Serve images and static files (css and js files).
|
||||
.service(fs::Files::new("/static", "./public/static").show_files_listing())
|
||||
.service(fs::Files::new("/images", "./public/images").show_files_listing())
|
||||
.service(
|
||||
fs::Files::new("/static", format!("{}/static", public_folder_path))
|
||||
.show_files_listing(),
|
||||
)
|
||||
.service(
|
||||
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
||||
.show_files_listing(),
|
||||
)
|
||||
.service(routes::robots_data) // robots.txt
|
||||
.service(routes::index) // index page
|
||||
.service(routes::search) // search page
|
||||
|
|
|
@ -116,7 +116,7 @@ impl RawSearchResult {
|
|||
}
|
||||
}
|
||||
|
||||
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
||||
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
||||
/// aggregated from the upstream search engines.
|
||||
///
|
||||
/// # Fields
|
||||
|
|
|
@ -29,6 +29,7 @@ use crate::engines::{duckduckgo, searx};
|
|||
///
|
||||
/// * `query` - Accepts a string to query with the above upstream search engines.
|
||||
/// * `page` - Accepts an u32 page number.
|
||||
/// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
|
@ -38,14 +39,18 @@ use crate::engines::{duckduckgo, searx};
|
|||
pub async fn aggregate(
|
||||
query: &str,
|
||||
page: u32,
|
||||
random_delay: bool,
|
||||
debug: bool,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
let user_agent: String = random_user_agent();
|
||||
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
|
||||
|
||||
// Add a random delay before making the request.
|
||||
let mut rng = rand::thread_rng();
|
||||
let delay_secs = rng.gen_range(1..10);
|
||||
std::thread::sleep(Duration::from_secs(delay_secs));
|
||||
if random_delay || !debug {
|
||||
let mut rng = rand::thread_rng();
|
||||
let delay_secs = rng.gen_range(1..10);
|
||||
std::thread::sleep(Duration::from_secs(delay_secs));
|
||||
}
|
||||
|
||||
// fetch results from upstream search engines simultaneously/concurrently.
|
||||
let (ddg_map_results, searx_map_results) = join!(
|
||||
|
|
|
@ -1,13 +1,8 @@
|
|||
//! This module provides the functionality to generate random user agent string.
|
||||
|
||||
use fake_useragent::{Browsers, UserAgentsBuilder};
|
||||
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
||||
|
||||
/// A function to generate random user agent to improve privacy of the user.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A randomly generated user agent string.
|
||||
pub fn random_user_agent() -> String {
|
||||
static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
|
||||
UserAgentsBuilder::new()
|
||||
.cache(false)
|
||||
.dir("/tmp")
|
||||
|
@ -21,6 +16,13 @@ pub fn random_user_agent() -> String {
|
|||
.set_mozilla(),
|
||||
)
|
||||
.build()
|
||||
.random()
|
||||
.to_string()
|
||||
});
|
||||
|
||||
/// A function to generate random user agent to improve privacy of the user.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A randomly generated user agent string.
|
||||
pub fn random_user_agent() -> String {
|
||||
USER_AGENTS.random().to_string()
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ use std::fs::read_to_string;
|
|||
use crate::{
|
||||
cache::cacher::RedisCache,
|
||||
config_parser::parser::Config,
|
||||
handler::public_path_handler::handle_different_public_path,
|
||||
search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate},
|
||||
};
|
||||
use actix_web::{get, web, HttpRequest, HttpResponse};
|
||||
|
@ -73,7 +74,7 @@ pub async fn search(
|
|||
let params = web::Query::<SearchParams>::from_query(req.query_string())?;
|
||||
|
||||
//Initialize redis cache connection struct
|
||||
let redis_cache = RedisCache::new(config.redis_connection_url.clone());
|
||||
let mut redis_cache = RedisCache::new(config.redis_connection_url.clone())?;
|
||||
match ¶ms.q {
|
||||
Some(query) => {
|
||||
if query.trim().is_empty() {
|
||||
|
@ -81,11 +82,10 @@ pub async fn search(
|
|||
.insert_header(("location", "/"))
|
||||
.finish())
|
||||
} else {
|
||||
// Initialize the page url as an empty string
|
||||
let mut page_url = String::new();
|
||||
let page_url: String; // Declare the page_url variable without initializing it
|
||||
|
||||
// ...
|
||||
|
||||
// Find whether the page is valid page number if not then return
|
||||
// the first page number and also construct the page_url accordingly
|
||||
let page = match params.page {
|
||||
Some(page_number) => {
|
||||
if page_number <= 1 {
|
||||
|
@ -117,7 +117,7 @@ pub async fn search(
|
|||
};
|
||||
|
||||
// fetch the cached results json.
|
||||
let cached_results_json = redis_cache.clone().cached_results_json(page_url.clone());
|
||||
let cached_results_json = redis_cache.cached_results_json(&page_url);
|
||||
// check if fetched results was indeed fetched or it was an error and if so
|
||||
// handle the data accordingly.
|
||||
match cached_results_json {
|
||||
|
@ -128,12 +128,10 @@ pub async fn search(
|
|||
}
|
||||
Err(_) => {
|
||||
let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
|
||||
aggregate(query, page).await?;
|
||||
aggregate(query, page, config.aggregator.random_delay, config.debug).await?;
|
||||
results_json.add_style(config.style.clone());
|
||||
redis_cache.clone().cache_results(
|
||||
serde_json::to_string(&results_json)?,
|
||||
page_url.clone(),
|
||||
)?;
|
||||
redis_cache
|
||||
.cache_results(serde_json::to_string(&results_json)?, &page_url)?;
|
||||
let page_content: String = hbs.render("search", &results_json)?;
|
||||
Ok(HttpResponse::Ok().body(page_content))
|
||||
}
|
||||
|
@ -149,7 +147,8 @@ pub async fn search(
|
|||
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
||||
#[get("/robots.txt")]
|
||||
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
||||
let page_content: String = read_to_string("./public/robots.txt")?;
|
||||
let page_content: String =
|
||||
read_to_string(format!("{}/robots.txt", handle_different_public_path()?))?;
|
||||
Ok(HttpResponse::Ok()
|
||||
.content_type("text/plain; charset=ascii")
|
||||
.body(page_content))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue