perf: several optimizations for improving the performance of the engine (#540)

* ♻️ refactor: initialize & store the config & cache structs as a constant (#486)
- initializes & stores the config & cache structs as a static constant.
- Pass the config & cache structs as a static reference to all the
  functions handling their respective route.

*  perf: replace hashmaps with vectors for fetching & aggregating results (#486)
- replace hashmaps with vectors for fetching, collecting & aggregating results as it tends to be contigous & cache efficient data structure.
- refactor & redesign algorithms for fetching & aggregating results
  centered around vectors in aggregate function.

*  build: add the future crate (#486)

*  perf: use `futureunordered` for collecting results fetched from the tokio spawn tasks (#486)
- using the `futureunordered` instead of vector for collecting results
  reduces the time it takes to fetch the results as the results do not
  need to come in specific order so any result that gets fetched first
  gets collected in the `futureunordered` type.

Co-authored-by: Spencerjibz <spencernajib2@gmail.com>

*  perf: initialize new async connections parallely using tokio spawn tasks (#486)

*  perf: initialize redis pipeline struct once with the default size of 3 (#486)

*  perf: reduce branch predictions by reducing conditional code branches (#486)

*  test(unit): provide unit test for the `get_safesearch_level` function (#486)

*  perf: reduce clones & use index based loop to improve search results filtering performance (#486)

* 🚨 fix(clippy): make clippy/format checks happy (#486)

* 🚨 fix(build): make the cargo build check happy (#486)

*  perf: reduce the amount of clones, to_owneds & to_strings (#486)

*  perf: use async crates & methods & make functions async (#486)

* 🔖 chore(release): bump the app version (#486)

---------

Co-authored-by: Spencerjibz <spencernajib2@gmail.com>
This commit is contained in:
neon_arch 2024-03-11 12:01:30 +03:00 committed by GitHub
parent 8d9b660eb1
commit 991f3f59de
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 685 additions and 533 deletions

View file

@ -48,7 +48,7 @@ impl SearchEngine for Bing {
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Bing uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1
// starts at 11, and so on.

View file

@ -44,7 +44,7 @@ impl SearchEngine for Brave {
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
let url = format!("https://search.brave.com/search?q={query}&offset={page}");
let safe_search_level = match safe_search {

View file

@ -47,7 +47,7 @@ impl SearchEngine for DuckDuckGo {
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.
let url: String = match page {

View file

@ -62,7 +62,7 @@ impl SearchEngine for LibreX {
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.
let url: String = format!(

View file

@ -47,7 +47,7 @@ impl SearchEngine for Mojeek {
user_agent: &str,
client: &Client,
safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Mojeek uses `start results from this number` convention
// So, for 10 results per page, page 0 starts at 1, page 1
// starts at 11, and so on.
@ -72,8 +72,23 @@ impl SearchEngine for Mojeek {
"Yep",
"You",
];
let qss = search_engines.join("%2C");
let safe = if safe_search == 0 { "0" } else { "1" };
// A branchless condition to check whether the `safe_search` parameter has the
// value 0 or not. If it is zero then it sets the value 0 otherwise it sets
// the value to 1 for all other values of `safe_search`
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// let safe = if safe_search == 0 { 0 } else { 1 }.to_string();
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
let safe = u8::from(safe_search != 0).to_string();
// Mojeek detects automated requests, these are preferences that are
// able to circumvent the countermeasure. Some of these are
@ -89,7 +104,7 @@ impl SearchEngine for Mojeek {
("hp", "minimal"),
("lb", "en"),
("qss", &qss),
("safe", safe),
("safe", &safe),
];
let mut query_params_string = String::new();

View file

@ -1,5 +1,4 @@
//! This modules provides helper functionalities for parsing a html document into internal SearchResult.
use std::collections::HashMap;
use crate::models::{aggregation_models::SearchResult, engine_models::EngineError};
use error_stack::{Report, Result};
@ -47,7 +46,7 @@ impl SearchResultParser {
&self,
document: &Html,
builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option<SearchResult>,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
let res = document
.select(&self.results)
.filter_map(|result| {

View file

@ -43,12 +43,21 @@ impl SearchEngine for Searx {
user_agent: &str,
client: &Client,
mut safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.
if safe_search == 3 {
safe_search = 2;
};
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// A branchless condition to check whether the `safe_search` parameter has the
// value greater than equal to three or not. If it is, then it modifies the
// `safesearch` parameters value to 2.
//
// Moreover, the below branchless code is equivalent to the following code below:
//
// ```rust
// safe_search = u8::from(safe_search == 3) * 2;
// ```
//
// For more information on branchless programming. See:
//
// * https://piped.video/watch?v=bVJ-mWWL7cE
safe_search = u8::from(safe_search >= 3) * 2;
let url: String = format!(
"https://searx.be/search?q={query}&pageno={}&safesearch={safe_search}",

View file

@ -47,7 +47,7 @@ impl SearchEngine for Startpage {
user_agent: &str,
client: &Client,
_safe_search: u8,
) -> Result<HashMap<String, SearchResult>, EngineError> {
) -> Result<Vec<(String, SearchResult)>, EngineError> {
// Page number can be missing or empty string and so appropriate handling is required
// so that upstream server recieves valid page number.
let url: String = format!(