⚡ perf: several optimizations for improving the performance of the engine (#540)
* ♻️ refactor: initialize & store the config & cache structs as a constant (#486) - initializes & stores the config & cache structs as a static constant. - Pass the config & cache structs as a static reference to all the functions handling their respective route. * ⚡ perf: replace hashmaps with vectors for fetching & aggregating results (#486) - replace hashmaps with vectors for fetching, collecting & aggregating results as it tends to be contigous & cache efficient data structure. - refactor & redesign algorithms for fetching & aggregating results centered around vectors in aggregate function. * ➕ build: add the future crate (#486) * ⚡ perf: use `futureunordered` for collecting results fetched from the tokio spawn tasks (#486) - using the `futureunordered` instead of vector for collecting results reduces the time it takes to fetch the results as the results do not need to come in specific order so any result that gets fetched first gets collected in the `futureunordered` type. Co-authored-by: Spencerjibz <spencernajib2@gmail.com> * ⚡ perf: initialize new async connections parallely using tokio spawn tasks (#486) * ⚡ perf: initialize redis pipeline struct once with the default size of 3 (#486) * ⚡ perf: reduce branch predictions by reducing conditional code branches (#486) * ✅ test(unit): provide unit test for the `get_safesearch_level` function (#486) * ⚡ perf: reduce clones & use index based loop to improve search results filtering performance (#486) * 🚨 fix(clippy): make clippy/format checks happy (#486) * 🚨 fix(build): make the cargo build check happy (#486) * ⚡ perf: reduce the amount of clones, to_owneds & to_strings (#486) * ⚡ perf: use async crates & methods & make functions async (#486) * 🔖 chore(release): bump the app version (#486) --------- Co-authored-by: Spencerjibz <spencernajib2@gmail.com>
This commit is contained in:
parent
8d9b660eb1
commit
991f3f59de
25 changed files with 685 additions and 533 deletions
|
@ -48,7 +48,7 @@ impl SearchEngine for Bing {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Bing uses `start results from this number` convention
|
||||
// So, for 10 results per page, page 0 starts at 1, page 1
|
||||
// starts at 11, and so on.
|
||||
|
|
|
@ -44,7 +44,7 @@ impl SearchEngine for Brave {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
|
||||
|
||||
let safe_search_level = match safe_search {
|
||||
|
|
|
@ -47,7 +47,7 @@ impl SearchEngine for DuckDuckGo {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = match page {
|
||||
|
|
|
@ -62,7 +62,7 @@ impl SearchEngine for LibreX {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = format!(
|
||||
|
|
|
@ -47,7 +47,7 @@ impl SearchEngine for Mojeek {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Mojeek uses `start results from this number` convention
|
||||
// So, for 10 results per page, page 0 starts at 1, page 1
|
||||
// starts at 11, and so on.
|
||||
|
@ -72,8 +72,23 @@ impl SearchEngine for Mojeek {
|
|||
"Yep",
|
||||
"You",
|
||||
];
|
||||
|
||||
let qss = search_engines.join("%2C");
|
||||
let safe = if safe_search == 0 { "0" } else { "1" };
|
||||
|
||||
// A branchless condition to check whether the `safe_search` parameter has the
|
||||
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
|
||||
// the value to 1 for all other values of `safe_search`
|
||||
//
|
||||
// Moreover, the below branchless code is equivalent to the following code below:
|
||||
//
|
||||
// ```rust
|
||||
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
|
||||
// ```
|
||||
//
|
||||
// For more information on branchless programming. See:
|
||||
//
|
||||
// * https://piped.video/watch?v=bVJ-mWWL7cE
|
||||
let safe = u8::from(safe_search != 0).to_string();
|
||||
|
||||
// Mojeek detects automated requests, these are preferences that are
|
||||
// able to circumvent the countermeasure. Some of these are
|
||||
|
@ -89,7 +104,7 @@ impl SearchEngine for Mojeek {
|
|||
("hp", "minimal"),
|
||||
("lb", "en"),
|
||||
("qss", &qss),
|
||||
("safe", safe),
|
||||
("safe", &safe),
|
||||
];
|
||||
|
||||
let mut query_params_string = String::new();
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
|
||||
use error_stack::{Report, Result};
|
||||
|
@ -47,7 +46,7 @@ impl SearchResultParser {
|
|||
&self,
|
||||
document: &Html,
|
||||
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
let res = document
|
||||
.select(&self.results)
|
||||
.filter_map(|result| {
|
||||
|
|
|
@ -43,12 +43,21 @@ impl SearchEngine for Searx {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
mut safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
if safe_search == 3 {
|
||||
safe_search = 2;
|
||||
};
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// A branchless condition to check whether the `safe_search` parameter has the
|
||||
// value greater than equal to three or not. If it is, then it modifies the
|
||||
// `safesearch` parameters value to 2.
|
||||
//
|
||||
// Moreover, the below branchless code is equivalent to the following code below:
|
||||
//
|
||||
// ```rust
|
||||
// safe_search = u8::from(safe_search == 3) * 2;
|
||||
// ```
|
||||
//
|
||||
// For more information on branchless programming. See:
|
||||
//
|
||||
// * https://piped.video/watch?v=bVJ-mWWL7cE
|
||||
safe_search = u8::from(safe_search >= 3) * 2;
|
||||
|
||||
let url: String = format!(
|
||||
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",
|
||||
|
|
|
@ -47,7 +47,7 @@ impl SearchEngine for Startpage {
|
|||
user_agent: &str,
|
||||
client: &Client,
|
||||
_safe_search: u8,
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
) -> Result<Vec<(String, SearchResult)>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = format!(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue