Merge branch 'rolling' into reorganize-code-and-restructure-the-codebase
This commit is contained in:
commit
ca4447fcdf
26 changed files with 1194 additions and 487 deletions
|
@ -60,14 +60,15 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|||
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
||||
/// containing appropriate values.
|
||||
pub async fn aggregate(
|
||||
query: String,
|
||||
query: &str,
|
||||
page: u32,
|
||||
random_delay: bool,
|
||||
debug: bool,
|
||||
upstream_search_engines: Vec<EngineHandler>,
|
||||
upstream_search_engines: &[EngineHandler],
|
||||
request_timeout: u8,
|
||||
safe_search: u8,
|
||||
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
||||
let user_agent: String = random_user_agent();
|
||||
let user_agent: &str = random_user_agent();
|
||||
|
||||
// Add a random delay before making the request.
|
||||
if random_delay || !debug {
|
||||
|
@ -76,19 +77,24 @@ pub async fn aggregate(
|
|||
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
||||
}
|
||||
|
||||
let mut names: Vec<&str> = vec![];
|
||||
let mut names: Vec<&str> = Vec::with_capacity(0);
|
||||
|
||||
// create tasks for upstream result fetching
|
||||
let mut tasks: FutureVec = FutureVec::new();
|
||||
|
||||
for engine_handler in upstream_search_engines {
|
||||
let (name, search_engine) = engine_handler.into_name_engine();
|
||||
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
||||
names.push(name);
|
||||
let query: String = query.clone();
|
||||
let user_agent: String = user_agent.clone();
|
||||
let query: String = query.to_owned();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
search_engine
|
||||
.results(query, page, user_agent.clone(), request_timeout)
|
||||
.results(
|
||||
&query,
|
||||
page,
|
||||
user_agent.clone(),
|
||||
request_timeout,
|
||||
safe_search,
|
||||
)
|
||||
.await
|
||||
}));
|
||||
}
|
||||
|
@ -106,7 +112,7 @@ pub async fn aggregate(
|
|||
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
||||
|
||||
let mut handle_error = |error: Report<EngineError>, engine_name: String| {
|
||||
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
||||
log::error!("Engine Error: {:?}", error);
|
||||
engine_errors_info.push(EngineErrorInfo::new(
|
||||
error.downcast_ref::<EngineError>().unwrap(),
|
||||
|
@ -116,7 +122,7 @@ pub async fn aggregate(
|
|||
|
||||
for _ in 0..responses.len() {
|
||||
let response = responses.pop().unwrap();
|
||||
let engine = names.pop().unwrap().to_string();
|
||||
let engine = names.pop().unwrap();
|
||||
|
||||
if result_map.is_empty() {
|
||||
match response {
|
||||
|
@ -124,7 +130,7 @@ pub async fn aggregate(
|
|||
result_map = results.clone();
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(error, engine);
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
@ -136,39 +142,37 @@ pub async fn aggregate(
|
|||
result_map
|
||||
.entry(key)
|
||||
.and_modify(|result| {
|
||||
result.add_engines(engine.clone());
|
||||
result.add_engines(engine);
|
||||
})
|
||||
.or_insert_with(|| -> SearchResult { value });
|
||||
});
|
||||
}
|
||||
Err(error) => {
|
||||
handle_error(error, engine);
|
||||
handle_error(&error, engine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
filter_with_lists(
|
||||
&mut result_map,
|
||||
&mut blacklist_map,
|
||||
&file_path(FileType::BlockList)?,
|
||||
)?;
|
||||
if safe_search >= 3 {
|
||||
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
||||
filter_with_lists(
|
||||
&mut result_map,
|
||||
&mut blacklist_map,
|
||||
file_path(FileType::BlockList)?,
|
||||
)?;
|
||||
|
||||
filter_with_lists(
|
||||
&mut blacklist_map,
|
||||
&mut result_map,
|
||||
&file_path(FileType::AllowList)?,
|
||||
)?;
|
||||
filter_with_lists(
|
||||
&mut blacklist_map,
|
||||
&mut result_map,
|
||||
file_path(FileType::AllowList)?,
|
||||
)?;
|
||||
|
||||
drop(blacklist_map);
|
||||
drop(blacklist_map);
|
||||
}
|
||||
|
||||
let results: Vec<SearchResult> = result_map.into_values().collect();
|
||||
|
||||
Ok(SearchResults::new(
|
||||
results,
|
||||
query.to_string(),
|
||||
engine_errors_info,
|
||||
))
|
||||
Ok(SearchResults::new(results, query, &engine_errors_info))
|
||||
}
|
||||
|
||||
/// Filters a map of search results using a list of regex patterns.
|
||||
|
@ -190,7 +194,7 @@ pub fn filter_with_lists(
|
|||
let mut reader = BufReader::new(File::open(file_path)?);
|
||||
|
||||
for line in reader.by_ref().lines() {
|
||||
let re = Regex::new(&line?)?;
|
||||
let re = Regex::new(line?.trim())?;
|
||||
|
||||
// Iterate over each search result in the map and check if it matches the regex pattern
|
||||
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
||||
|
@ -199,7 +203,10 @@ pub fn filter_with_lists(
|
|||
|| re.is_match(&search_result.description.to_lowercase())
|
||||
{
|
||||
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
||||
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
||||
resultant_map.insert(
|
||||
url.to_owned(),
|
||||
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -210,6 +217,7 @@ pub fn filter_with_lists(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use smallvec::smallvec;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
@ -219,22 +227,22 @@ mod tests {
|
|||
// Create a map of search results to filter
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.rust-lang.org/".to_string(),
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_string(),
|
||||
url: "https://www.rust-lang.org/".to_string(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
@ -263,22 +271,22 @@ mod tests {
|
|||
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.rust-lang.org/".to_string(),
|
||||
"https://www.rust-lang.org/".to_owned(),
|
||||
SearchResult {
|
||||
title: "Rust Programming Language".to_string(),
|
||||
url: "https://www.rust-lang.org/".to_string(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
||||
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
||||
title: "Rust Programming Language".to_owned(),
|
||||
url: "https://www.rust-lang.org/".to_owned(),
|
||||
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
@ -323,13 +331,13 @@ mod tests {
|
|||
fn test_filter_with_lists_invalid_regex() {
|
||||
let mut map_to_be_filtered = HashMap::new();
|
||||
map_to_be_filtered.insert(
|
||||
"https://www.example.com".to_string(),
|
||||
"https://www.example.com".to_owned(),
|
||||
SearchResult {
|
||||
title: "Example Domain".to_string(),
|
||||
url: "https://www.example.com".to_string(),
|
||||
title: "Example Domain".to_owned(),
|
||||
url: "https://www.example.com".to_owned(),
|
||||
description: "This domain is for use in illustrative examples in documents."
|
||||
.to_string(),
|
||||
engine: vec!["Google".to_string(), "Bing".to_string()],
|
||||
.to_owned(),
|
||||
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
@ -1,30 +1,34 @@
|
|||
//! This module provides the functionality to generate random user agent string.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
||||
|
||||
/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
|
||||
/// again and again without the need of reinitializing the `UserAgents` struct.
|
||||
static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
|
||||
UserAgentsBuilder::new()
|
||||
.cache(false)
|
||||
.dir("/tmp")
|
||||
.thread(1)
|
||||
.set_browsers(
|
||||
Browsers::new()
|
||||
.set_chrome()
|
||||
.set_safari()
|
||||
.set_edge()
|
||||
.set_firefox()
|
||||
.set_mozilla(),
|
||||
)
|
||||
.build()
|
||||
});
|
||||
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
||||
|
||||
/// A function to generate random user agent to improve privacy of the user.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A randomly generated user agent string.
|
||||
pub fn random_user_agent() -> String {
|
||||
USER_AGENTS.random().to_string()
|
||||
pub fn random_user_agent() -> &'static str {
|
||||
USER_AGENTS
|
||||
.get_or_init(|| {
|
||||
UserAgentsBuilder::new()
|
||||
.cache(false)
|
||||
.dir("/tmp")
|
||||
.thread(1)
|
||||
.set_browsers(
|
||||
Browsers::new()
|
||||
.set_chrome()
|
||||
.set_safari()
|
||||
.set_edge()
|
||||
.set_firefox()
|
||||
.set_mozilla(),
|
||||
)
|
||||
.build()
|
||||
})
|
||||
.random()
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue