⚙️ refactor: change & add documentation to the code based on the lints (#205)
This commit is contained in:
parent
0d2d449889
commit
049b1c1ddd
16 changed files with 177 additions and 132 deletions
|
@ -19,25 +19,7 @@ pub struct DuckDuckGo;
|
|||
|
||||
#[async_trait::async_trait]
|
||||
impl SearchEngine for DuckDuckGo {
|
||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
||||
/// values are RawSearchResult struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
page: u32,
|
||||
|
|
|
@ -6,19 +6,18 @@ use error_stack::{IntoReport, Result, ResultExt};
|
|||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
/// A custom error type used for handle engine associated errors.
|
||||
///
|
||||
/// This enum provides variants three different categories of errors:
|
||||
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
||||
/// etc.
|
||||
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
||||
/// search engines.
|
||||
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
||||
/// all other errors occurring within the code handling the `upstream search engines`.
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
/// This variant handles all request related errors like forbidden, not found,
|
||||
/// etc.
|
||||
EmptyResultSet,
|
||||
/// This variant handles the not results found error provide by the upstream
|
||||
/// search engines.
|
||||
RequestError,
|
||||
/// This variant handles all the errors which are unexpected or occur rarely
|
||||
/// and are errors mostly related to failure in initialization of HeaderMap,
|
||||
/// Selector errors and all other errors occurring within the code handling
|
||||
/// the `upstream search engines`.
|
||||
UnexpectedError,
|
||||
}
|
||||
|
||||
|
@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
|
|||
/// A trait to define common behavior for all search engines.
|
||||
#[async_trait::async_trait]
|
||||
pub trait SearchEngine: Sync + Send {
|
||||
/// This helper function fetches/requests the search results from the upstream search engine in
|
||||
/// an html form.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `url` - It takes the url of the upstream search engine with the user requested search
|
||||
/// query appended in the search parameters.
|
||||
/// * `header_map` - It takes the http request headers to be sent to the upstream engine in
|
||||
/// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
|
||||
/// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
|
||||
/// the amount of time for each request to remain connected when until the results can be provided
|
||||
/// by the upstream engine.
|
||||
///
|
||||
/// # Error
|
||||
///
|
||||
/// It returns the html data as a string if the upstream engine provides the data as expected
|
||||
/// otherwise it returns a custom `EngineError`.
|
||||
async fn fetch_html_from_upstream(
|
||||
&self,
|
||||
url: String,
|
||||
|
@ -67,6 +83,24 @@ pub trait SearchEngine: Sync + Send {
|
|||
.change_context(EngineError::RequestError)?)
|
||||
}
|
||||
|
||||
/// This function scrapes results from the upstream engine and puts all the scraped results like
|
||||
/// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
|
||||
/// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
|
||||
/// struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
|
@ -76,8 +110,12 @@ pub trait SearchEngine: Sync + Send {
|
|||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
}
|
||||
|
||||
/// A named struct which stores the engine struct with the name of the associated engine.
|
||||
pub struct EngineHandler {
|
||||
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
|
||||
/// the `SearchEngine` trait.
|
||||
engine: Box<dyn SearchEngine>,
|
||||
/// It stores the name of the engine to which the struct is associated to.
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
|
@ -88,7 +126,15 @@ impl Clone for EngineHandler {
|
|||
}
|
||||
|
||||
impl EngineHandler {
|
||||
/// parses an engine name into an engine handler, returns none if the engine is unknown
|
||||
/// Parses an engine name into an engine handler.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `engine_name` - It takes the name of the engine to which the struct was associated to.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// It returns an option either containing the value or a none if the engine is unknown
|
||||
pub fn new(engine_name: &str) -> Option<Self> {
|
||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||
match engine_name.to_lowercase().as_str() {
|
||||
|
@ -103,6 +149,8 @@ impl EngineHandler {
|
|||
})
|
||||
}
|
||||
|
||||
/// This function converts the EngineHandler type into a tuple containing the engine name and
|
||||
/// the associated engine struct.
|
||||
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
||||
(self.name, self.engine)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
//! This module provides different modules which handles the functionlity to fetch results from the
|
||||
//! upstream search engines based on user requested queries. Also provides different models to
|
||||
//! provide a standard functions to be implemented for all the upstream search engine handling
|
||||
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
||||
|
||||
pub mod duckduckgo;
|
||||
pub mod engine_models;
|
||||
pub mod searx;
|
||||
|
|
|
@ -17,25 +17,6 @@ pub struct Searx;
|
|||
|
||||
#[async_trait::async_trait]
|
||||
impl SearchEngine for Searx {
|
||||
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
||||
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
||||
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
||||
/// values are RawSearchResult struct and then returns it within a Result enum.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
||||
/// * `page` - Takes an u32 as an argument.
|
||||
/// * `user_agent` - Takes a random user agent string as an argument.
|
||||
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
||||
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
||||
/// provide results for the requested search query and also returns error if the scraping selector
|
||||
/// or HeaderMap fails to initialize.
|
||||
|
||||
async fn results(
|
||||
&self,
|
||||
query: String,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue