Ranking of aggregated search results based on relevancy of the search result to the user's search query (#549)

* add sorting by relevance and merge new changes

* fix conflicts

* Update src/models/aggregation_models.rs

Co-authored-by: neon_arch <mustafadhuleb53@gmail.com>

* Update src/models/aggregation_models.rs

Co-authored-by: neon_arch <mustafadhuleb53@gmail.com>

* Update Cargo.toml

Co-authored-by: neon_arch <mustafadhuleb53@gmail.com>

* Update Cargo.toml

Co-authored-by: neon_arch <mustafadhuleb53@gmail.com>

* Update Cargo.toml

Co-authored-by: neon_arch <mustafadhuleb53@gmail.com>

* enable non-static-synonyms features

---------

Co-authored-by: neon_arch <mustafadhuleb53@gmail.com>
This commit is contained in:
Spencer 2024-03-25 09:16:49 +00:00 committed by GitHub
parent c584a7d601
commit bb50e8bb25
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 296 additions and 41 deletions

View file

@ -8,6 +8,7 @@ use crate::models::{
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
engine_models::{EngineError, EngineHandler},
};
use error_stack::Report;
use futures::stream::FuturesUnordered;
use regex::Regex;
@ -184,7 +185,17 @@ pub async fn aggregate(
drop(blacklist_map);
}
let results: Vec<SearchResult> = result_map.iter().map(|(_, value)| value.clone()).collect();
let mut results: Vec<SearchResult> = result_map
.iter()
.map(|(_, value)| {
let mut copy = value.clone();
if !copy.url.contains("temu.com") {
copy.calculate_relevance(query.as_str())
}
copy
})
.collect();
sort_search_results(&mut results);
Ok(SearchResults::new(results, &engine_errors_info))
}
@ -233,7 +244,21 @@ pub async fn filter_with_lists(
Ok(())
}
/// Sorts SearchResults by relevance score.
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
/// # Arguments
/// * `results` - A mutable slice or Vec of SearchResults
///
fn sort_search_results(results: &mut [SearchResult]) {
results.sort_unstable_by(|a, b| {
use std::cmp::Ordering;
b.relevance_score
.partial_cmp(&a.relevance_score)
.unwrap_or(Ordering::Less)
})
}
#[cfg(test)]
mod tests {
use super::*;
@ -252,6 +277,7 @@ mod tests {
url: "https://www.example.com".to_owned(),
description: "This domain is for use in illustrative examples in documents."
.to_owned(),
relevance_score: 0.0,
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
},
));
@ -262,6 +288,7 @@ mod tests {
url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
relevance_score:0.0
},)
);
@ -302,6 +329,7 @@ mod tests {
description: "This domain is for use in illustrative examples in documents."
.to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
relevance_score: 0.0,
},
));
map_to_be_filtered.push((
@ -311,6 +339,7 @@ mod tests {
url: "https://www.rust-lang.org/".to_owned(),
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
relevance_score:0.0
},
));
@ -367,6 +396,7 @@ mod tests {
description: "This domain is for use in illustrative examples in documents."
.to_owned(),
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
relevance_score: 0.0,
},
));