updating and improving README.org

This commit is contained in:
neon_arch 2023-04-27 16:06:59 +03:00
parent e79100069b
commit ed13a16ec5
3 changed files with 42 additions and 10 deletions

View file

@ -1,3 +1,7 @@
//! The `duckduckgo` module handles the scraping of results from the duckduckgo search engine
//! by querying the upstream duckduckgo search engine with user provided query and with a page
//! number if provided.
use std::collections::HashMap;
use reqwest::header::USER_AGENT;
@ -5,10 +9,22 @@ use scraper::{Html, Selector};
use crate::search_results_handler::aggregation_models::RawSearchResult;
// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
// values are RawSearchResult struct and then returns it within a Result enum.
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
/// values are RawSearchResult struct and then returns it within a Result enum.
///
/// # Arguments
///
/// * `query` - Takes the user provided query to query to the upstream search engine with.
/// * `page` - Takes an Option<u32> as argument which can be either None or a valid page number.
/// * `user_agent` - Takes a random user agent string as an argument.
///
/// # Errors
///
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
/// reach the above **upstream search engine** page and also returns error if the scraping
/// selector fails to initialize"
pub async fn results(
query: &str,
page: Option<u32>,

View file

@ -1,3 +1,7 @@
//! The `searx` module handles the scraping of results from the searx search engine instance
//! by querying the upstream searx search engine instance with user provided query and with a page
//! number if provided.
use std::collections::HashMap;
use reqwest::header::USER_AGENT;
@ -5,10 +9,22 @@ use scraper::{Html, Selector};
use crate::search_results_handler::aggregation_models::RawSearchResult;
// This function scrapes results from the upstream engine searx instance and puts all the scraped
// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
// values are RawSearchResult struct and then returns it within a Result enum.
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
/// values are RawSearchResult struct and then returns it within a Result enum.
///
/// # Arguments
///
/// * `query` - Takes the user provided query to query to the upstream search engine with.
/// * `page` - Takes an Option<u32> as argument which can be either None or a valid page number.
/// * `user_agent` - Takes a random user agent string as an argument.
///
/// # Errors
///
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
/// reach the above **upstream search engine** page and also returns error if the scraping
/// selector fails to initialize"
pub async fn results(
query: &str,
page: Option<u32>,