Improve aggregation
Adds the EngineHandler struct Removes vulnerability where an attacker could send requests cookies with fake engine names and crash the server. Merged RawSearchResult and SearchResult, as they were functionally identical.
This commit is contained in:
parent
15dfda6ea9
commit
5aca5c0d0d
7 changed files with 84 additions and 99 deletions
|
@ -7,7 +7,7 @@ use std::collections::HashMap;
|
|||
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::results::aggregation_models::RawSearchResult;
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
|
||||
|
@ -43,7 +43,7 @@ impl SearchEngine for DuckDuckGo {
|
|||
page: u32,
|
||||
user_agent: String,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, RawSearchResult>, EngineError> {
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = match page {
|
||||
|
@ -120,7 +120,7 @@ impl SearchEngine for DuckDuckGo {
|
|||
Ok(document
|
||||
.select(&results)
|
||||
.map(|result| {
|
||||
RawSearchResult::new(
|
||||
SearchResult::new(
|
||||
result
|
||||
.select(&result_title)
|
||||
.next()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//! This module provides the error enum to handle different errors associated while requesting data from
|
||||
//! the upstream search engines with the search query provided by the user.
|
||||
|
||||
use crate::results::aggregation_models::RawSearchResult;
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
use error_stack::{IntoReport, Result, ResultExt};
|
||||
use std::{collections::HashMap, fmt, time::Duration};
|
||||
|
||||
|
@ -45,7 +45,7 @@ impl error_stack::Context for EngineError {}
|
|||
|
||||
/// A trait to define common behavior for all search engines.
|
||||
#[async_trait::async_trait]
|
||||
pub trait SearchEngine {
|
||||
pub trait SearchEngine: Sync + Send {
|
||||
async fn fetch_html_from_upstream(
|
||||
&self,
|
||||
url: String,
|
||||
|
@ -73,5 +73,37 @@ pub trait SearchEngine {
|
|||
page: u32,
|
||||
user_agent: String,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, RawSearchResult>, EngineError>;
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
||||
}
|
||||
|
||||
pub struct EngineHandler {
|
||||
engine: Box<dyn SearchEngine>,
|
||||
name: &'static str,
|
||||
}
|
||||
|
||||
impl Clone for EngineHandler {
|
||||
fn clone(&self) -> Self {
|
||||
Self::new(self.name).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl EngineHandler {
|
||||
/// parses an engine name into an engine handler, returns none if the engine is unknown
|
||||
pub fn new(engine_name: &str) -> Option<Self> {
|
||||
let engine: (&'static str, Box<dyn SearchEngine>) =
|
||||
match engine_name.to_lowercase().as_str() {
|
||||
"duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
|
||||
"searx" => ("searx", Box::new(super::searx::Searx)),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(Self {
|
||||
engine: engine.1,
|
||||
name: engine.0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
||||
(self.name, self.engine)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
|||
use scraper::{Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::results::aggregation_models::RawSearchResult;
|
||||
use crate::results::aggregation_models::SearchResult;
|
||||
|
||||
use super::engine_models::{EngineError, SearchEngine};
|
||||
use error_stack::{IntoReport, Report, Result, ResultExt};
|
||||
|
@ -42,7 +42,7 @@ impl SearchEngine for Searx {
|
|||
page: u32,
|
||||
user_agent: String,
|
||||
request_timeout: u8,
|
||||
) -> Result<HashMap<String, RawSearchResult>, EngineError> {
|
||||
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
||||
// Page number can be missing or empty string and so appropriate handling is required
|
||||
// so that upstream server recieves valid page number.
|
||||
let url: String = match page {
|
||||
|
@ -111,7 +111,7 @@ impl SearchEngine for Searx {
|
|||
Ok(document
|
||||
.select(&results)
|
||||
.map(|result| {
|
||||
RawSearchResult::new(
|
||||
SearchResult::new(
|
||||
result
|
||||
.select(&result_title)
|
||||
.next()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue