⚙️ refactor: reorganize code & restructure codebase for better maintainability (#207)
This commit is contained in:
parent
453dbdc47d
commit
493c56bd02
17 changed files with 141 additions and 122 deletions
146
src/models/aggregation_models.rs
Normal file
146
src/models/aggregation_models.rs
Normal file
|
@ -0,0 +1,146 @@
|
|||
//! This module provides public models for handling, storing and serializing of search results
|
||||
//! data scraped from the upstream search engines.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{engine_models::EngineError, parser_models::Style};
|
||||
|
||||
/// A named struct to store the raw scraped search results scraped search results from the
|
||||
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
||||
/// to write idiomatic rust using `Iterators`.
|
||||
/// (href url in html in simple words).
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
/// The title of the search result.
|
||||
pub title: String,
|
||||
/// The url which is accessed when clicked on it
|
||||
pub url: String,
|
||||
/// The description of the search result.
|
||||
pub description: String,
|
||||
/// The names of the upstream engines from which this results were provided.
|
||||
pub engine: Vec<String>,
|
||||
}
|
||||
|
||||
impl SearchResult {
|
||||
/// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `title` - The title of the search result.
|
||||
/// * `url` - The url which is accessed when clicked on it
|
||||
/// (href url in html in simple words).
|
||||
/// * `description` - The description of the search result.
|
||||
/// * `engine` - The names of the upstream engines from which this results were provided.
|
||||
pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
|
||||
SearchResult {
|
||||
title,
|
||||
url,
|
||||
description,
|
||||
engine,
|
||||
}
|
||||
}
|
||||
|
||||
/// A function which adds the engine name provided as a string into a vector of strings.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `engine` - Takes an engine name provided as a String.
|
||||
pub fn add_engines(&mut self, engine: String) {
|
||||
self.engine.push(engine)
|
||||
}
|
||||
|
||||
/// A function which returns the engine name stored from the struct as a string.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// An engine name stored as a string from the struct.
|
||||
pub fn engine(self) -> String {
|
||||
self.engine.get(0).unwrap().to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// A named struct that stores the error info related to the upstream search engines.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct EngineErrorInfo {
|
||||
/// It stores the error type which occured while fetching the result from a particular search
|
||||
/// engine.
|
||||
pub error: String,
|
||||
/// It stores the name of the engine that failed to provide the requested search results.
|
||||
pub engine: String,
|
||||
/// It stores the name of the color to indicate whether how severe the particular error is (In
|
||||
/// other words it indicates the severity of the error/issue).
|
||||
pub severity_color: String,
|
||||
}
|
||||
|
||||
impl EngineErrorInfo {
|
||||
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
||||
/// search engine.
|
||||
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
||||
pub fn new(error: &EngineError, engine: String) -> Self {
|
||||
Self {
|
||||
error: match error {
|
||||
EngineError::RequestError => String::from("RequestError"),
|
||||
EngineError::EmptyResultSet => String::from("EmptyResultSet"),
|
||||
EngineError::UnexpectedError => String::from("UnexpectedError"),
|
||||
},
|
||||
engine,
|
||||
severity_color: match error {
|
||||
EngineError::RequestError => String::from("green"),
|
||||
EngineError::EmptyResultSet => String::from("blue"),
|
||||
EngineError::UnexpectedError => String::from("red"),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
||||
/// aggregated from the upstream search engines.
|
||||
/// `SearchResult` structs.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResults {
|
||||
/// Stores the individual serializable `SearchResult` struct into a vector of
|
||||
pub results: Vec<SearchResult>,
|
||||
/// Stores the current pages search query `q` provided in the search url.
|
||||
pub page_query: String,
|
||||
/// Stores the theming options for the website.
|
||||
pub style: Style,
|
||||
/// Stores the information on which engines failed with their engine name
|
||||
/// and the type of error that caused it.
|
||||
pub engine_errors_info: Vec<EngineErrorInfo>,
|
||||
}
|
||||
|
||||
impl SearchResults {
|
||||
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
||||
/// and stores it into a vector of `SearchResult` structs.
|
||||
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
||||
/// the search url.
|
||||
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
||||
/// given search query.
|
||||
pub fn new(
|
||||
results: Vec<SearchResult>,
|
||||
page_query: String,
|
||||
engine_errors_info: Vec<EngineErrorInfo>,
|
||||
) -> Self {
|
||||
SearchResults {
|
||||
results,
|
||||
page_query,
|
||||
style: Style::new("".to_string(), "".to_string()),
|
||||
engine_errors_info,
|
||||
}
|
||||
}
|
||||
|
||||
/// A setter function to add website style to the return search results.
|
||||
pub fn add_style(&mut self, style: Style) {
|
||||
self.style = style;
|
||||
}
|
||||
}
|
160
src/models/engine_models.rs
Normal file
160
src/models/engine_models.rs
Normal file
|
@ -0,0 +1,160 @@
|
|||
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||
//! the upstream search engines with the search query provided by the user.
|
||||
|
||||
use super::aggregation_models::SearchResult;
|
||||
use error_stack::{IntoReport, Result, ResultExt};
|
||||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
/// This variant handles all request related errors like forbidden, not found,
|
||||
/// etc.
|
||||
EmptyResultSet,
|
||||
/// This variant handles the not results found error provide by the upstream
|
||||
/// search engines.
|
||||
RequestError,
|
||||
/// This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap,
|
||||
/// Selector errors and all other errors occurring within the code handling
|
||||
/// the `upstream search engines`.
|
||||
UnexpectedError,
|
||||
}
|
||||
|
||||
impl fmt::Display for EngineError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
EngineError::EmptyResultSet => {
|
||||
write!(f, "The upstream search engine returned an empty result set")
|
||||
}
|
||||
EngineError::RequestError => {
|
||||
write!(
|
||||
f,
|
||||
"Error occurred while requesting data from upstream search engine"
|
||||
)
|
||||
}
|
||||
EngineError::UnexpectedError => {
|
||||
write!(f, "An unexpected error occurred while processing the data")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error_stack::Context for EngineError {}
|
||||
|
||||
/// A trait to define common behavior for all search engines.
|
||||
#[async_trait::async_trait]
|
||||
pub trait SearchEngine: Sync + Send {
|
||||
/// This helper function fetches/requests the search results from the upstream search engine in
|
||||
/// an html form.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes the url of the upstream search engine with the user requested search
|
||||
/// query appended in the search parameters.
|
||||
/// * `header_map` - It takes the http request headers to be sent to the upstream engine in
|
||||
/// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
|
||||
/// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
|
||||
/// the amount of time for each request to remain connected when until the results can be provided
|
||||
/// by the upstream engine.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// It returns the html data as a string if the upstream engine provides the data as expected
|
||||
/// otherwise it returns a custom `EngineError`.
|
||||
async fn fetch_html_from_upstream(
|
||||
&self,
|
||||
url: String,
|
||||
header_map: reqwest::header::HeaderMap,
|
||||
request_timeout: u8,
|
||||
) -> Result<String, EngineError> {
|
||||
// fetch the html from upstream search engine
|
||||
Ok(reqwest::Client::new()
|
||||
.get(url)
|
||||
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
||||
.headers(header_map) // add spoofed headers to emulate human behavior
|
||||
.send()
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?
|
||||
.text()
|
||||
.await
|
||||
.into_report()
|
||||
.change_context(EngineError::RequestError)?)
|
||||
}
|
||||
|
||||
/// This function scrapes results from the upstream engine and puts all the scraped results like
|
||||
/// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
|
||||
/// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
|
||||
/// struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
page: u32,
|
||||
user_agent: String,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
}
|
||||
|
||||
/// A named struct which stores the engine struct with the name of the associated engine.
|
||||
pub struct EngineHandler {
|
||||
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
|
||||
/// the `SearchEngine` trait.
|
||||
engine: Box<dyn SearchEngine>,
|
||||
/// It stores the name of the engine to which the struct is associated to.
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
impl Clone for EngineHandler {
|
||||
fn clone(&self) -> Self {
|
||||
Self::new(self.name).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl EngineHandler {
|
||||
/// Parses an engine name into an engine handler.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `engine_name` - It takes the name of the engine to which the struct was associated to.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// It returns an option either containing the value or a none if the engine is unknown
|
||||
pub fn new(engine_name: &str) -> Option<Self> {
|
||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||
match engine_name.to_lowercase().as_str() {
|
||||
"duckduckgo" => (
|
||||
"duckduckgo",
|
||||
Box::new(crate::engines::duckduckgo::DuckDuckGo),
|
||||
),
|
||||
"searx" => ("searx", Box::new(crate::engines::searx::Searx)),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(Self {
|
||||
engine: engine.1,
|
||||
name: engine.0,
|
||||
})
|
||||
}
|
||||
|
||||
/// This function converts the EngineHandler type into a tuple containing the engine name and
|
||||
/// the associated engine struct.
|
||||
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
||||
(self.name, self.engine)
|
||||
}
|
||||
}
|
8
src/models/mod.rs
Normal file
8
src/models/mod.rs
Normal file
|
@ -0,0 +1,8 @@
|
|||
//! This module provides modules which in turn provides various models for aggregrating search
|
||||
//! results, parsing config file, providing trait to standardize search engine handling code,
|
||||
//! custom engine error for the search engine, etc.
|
||||
|
||||
pub mod aggregation_models;
|
||||
pub mod engine_models;
|
||||
pub mod parser_models;
|
||||
pub mod server_models;
|
35
src/models/parser_models.rs
Normal file
35
src/models/parser_models.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
//! This module provides public models for handling, storing and serializing parsed config file
|
||||
//! options from config.lua by grouping them together.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A named struct which stores,deserializes, serializes and groups the parsed config file options
|
||||
/// of theme and colorscheme names into the Style struct which derives the `Clone`, `Serialize`
|
||||
/// and Deserialize traits where the `Clone` trait is derived for allowing the struct to be
|
||||
/// cloned and passed to the server as a shared data between all routes except `/robots.txt` and
|
||||
/// the `Serialize` trait has been derived for allowing the object to be serialized so that it
|
||||
/// can be passed to handlebars template files and the `Deserialize` trait has been derived in
|
||||
/// order to allow the deserializing the json back to struct in aggregate function in
|
||||
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
||||
/// it to the template files.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct Style {
|
||||
/// It stores the parsed theme option used to set a theme for the website.
|
||||
pub theme: String,
|
||||
/// It stores the parsed colorscheme option used to set a colorscheme for the
|
||||
/// theme being used.
|
||||
pub colorscheme: String,
|
||||
}
|
||||
|
||||
impl Style {
|
||||
/// Constructs a new `Style` with the given arguments needed for the struct.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `theme` - It takes the parsed theme option used to set a theme for the website.
|
||||
/// * `colorscheme` - It takes the parsed colorscheme option used to set a colorscheme
|
||||
/// for the theme being used.
|
||||
pub fn new(theme: String, colorscheme: String) -> Self {
|
||||
Style { theme, colorscheme }
|
||||
}
|
||||
}
|
26
src/models/server_models.rs
Normal file
26
src/models/server_models.rs
Normal file
|
@ -0,0 +1,26 @@
|
|||
//! This module provides the models to parse cookies and search parameters from the search
|
||||
//! engine website.
|
||||
use serde::Deserialize;
|
||||
|
||||
/// A named struct which deserializes all the user provided search parameters and stores them.
|
||||
#[derive(Deserialize)]
|
||||
pub struct SearchParams {
|
||||
/// It stores the search parameter option `q` (or query in simple words)
|
||||
/// of the search url.
|
||||
pub q: Option<String>,
|
||||
/// It stores the search parameter `page` (or pageno in simple words)
|
||||
/// of the search url.
|
||||
pub page: Option<u32>,
|
||||
}
|
||||
|
||||
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize)]
|
||||
pub struct Cookie {
|
||||
/// It stores the theme name used in the website.
|
||||
pub theme: String,
|
||||
/// It stores the colorscheme name used for the website theme.
|
||||
pub colorscheme: String,
|
||||
/// It stores the user selected upstream search engines selected from the UI.
|
||||
pub engines: Vec<String>,
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue