Cache refactor - add an in-memory cache, so redis is not needed

This commit is contained in:
Zsombor Gegesy 2023-09-09 18:17:29 +02:00
parent d1d2d4e44d
commit 996ff84c5b
10 changed files with 263 additions and 146 deletions

185
src/cache/cacher.rs vendored
View file

@ -2,107 +2,53 @@
//! from the upstream search engines in a json format.
use error_stack::Report;
use futures::future::try_join_all;
use md5::compute;
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
use mini_moka::sync::Cache as MokaCache;
use std::time::Duration;
use tokio::sync::Mutex;
use super::error::PoolError;
use super::{error::PoolError, redis_cacher::RedisCache};
/// A named struct which stores the redis Connection url address to which the client will
/// connect to.
/// Different implementations for caching, currently it is possible to cache in-memory or in Redis.
#[derive(Clone)]
pub struct RedisCache {
/// It stores a pool of connections ready to be used.
connection_pool: Vec<ConnectionManager>,
/// It stores the size of the connection pool (in other words the number of
/// connections that should be stored in the pool).
pool_size: u8,
/// It stores the index of which connection is being used at the moment.
current_connection: u8,
pub enum Cache {
/// Encapsulates the Redis based cache
Redis(RedisCache),
/// Contains the in-memory cache.
InMemory(MokaCache<String, String>),
}
impl RedisCache {
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
///
/// # Arguments
///
/// * `redis_connection_url` - It takes the redis Connection url address.
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
/// connections that should be stored in the pool).
pub async fn new(
redis_connection_url: &str,
pool_size: u8,
) -> Result<Self, Box<dyn std::error::Error>> {
let client = Client::open(redis_connection_url)?;
let mut tasks: Vec<_> = Vec::new();
for _ in 0..pool_size {
tasks.push(client.get_tokio_connection_manager());
}
let redis_cache = RedisCache {
connection_pool: try_join_all(tasks).await?,
pool_size,
current_connection: Default::default(),
};
Ok(redis_cache)
impl Cache {
/// Creates a new cache, which wraps the given RedisCache.
pub fn new(redis_cache: RedisCache) -> Self {
Cache::Redis(redis_cache)
}
/// A helper function which computes the hash of the url and formats and returns it as string.
///
/// # Arguments
///
/// * `url` - It takes an url as string.
fn hash_url(&self, url: &str) -> String {
format!("{:?}", compute(url))
/// Creates an in-memory cache
pub fn new_in_memory() -> Self {
let cache = MokaCache::builder()
.max_capacity(1000)
.time_to_live(Duration::from_secs(60))
.build();
Cache::InMemory(cache)
}
/// A function which fetches the cached json results as json string from the redis server.
/// A function which fetches the cached json results as json string.
///
/// # Arguments
///
/// * `url` - It takes an url as a string.
pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<PoolError>> {
self.current_connection = Default::default();
let hashed_url_string: &str = &self.hash_url(url);
let mut result: Result<String, RedisError> = self.connection_pool
[self.current_connection as usize]
.get(hashed_url_string)
.await;
// Code to check whether the current connection being used is dropped with connection error
// or not. if it drops with the connection error then the current connection is replaced
// with a new connection from the pool which is then used to run the redis command then
// that connection is also checked whether it is dropped or not if it is not then the
// result is passed as a `Result` or else the same process repeats again and if all of the
// connections in the pool result in connection drop error then a custom pool error is
// returned.
loop {
match result {
Err(error) => match error.is_connection_dropped() {
true => {
self.current_connection += 1;
if self.current_connection == self.pool_size {
return Err(Report::new(
PoolError::PoolExhaustionWithConnectionDropError,
));
}
result = self.connection_pool[self.current_connection as usize]
.get(hashed_url_string)
.await;
continue;
}
false => return Err(Report::new(PoolError::RedisError(error))),
},
Ok(res) => return Ok(res),
}
match self {
Cache::Redis(redis_cache) => redis_cache.cached_json(url).await,
Cache::InMemory(in_memory) => match in_memory.get(&url.to_string()) {
Some(res) => Ok(res),
None => Err(Report::new(PoolError::MissingValue)),
},
}
}
/// A function which caches the results by using the hashed `url` as the key and
/// `json results` as the value and stores it in redis server with ttl(time to live)
/// set to 60 seconds.
/// A function which caches the results by using the `url` as the key and
/// `json results` as the value and stores it in the cache
///
/// # Arguments
///
@ -110,43 +56,46 @@ impl RedisCache {
/// * `url` - It takes the url as a String.
pub async fn cache_results(
&mut self,
json_results: &str,
json_results: String,
url: &str,
) -> Result<(), Report<PoolError>> {
self.current_connection = Default::default();
let hashed_url_string: &str = &self.hash_url(url);
let mut result: Result<(), RedisError> = self.connection_pool
[self.current_connection as usize]
.set_ex(hashed_url_string, json_results, 60)
.await;
// Code to check whether the current connection being used is dropped with connection error
// or not. if it drops with the connection error then the current connection is replaced
// with a new connection from the pool which is then used to run the redis command then
// that connection is also checked whether it is dropped or not if it is not then the
// result is passed as a `Result` or else the same process repeats again and if all of the
// connections in the pool result in connection drop error then a custom pool error is
// returned.
loop {
match result {
Err(error) => match error.is_connection_dropped() {
true => {
self.current_connection += 1;
if self.current_connection == self.pool_size {
return Err(Report::new(
PoolError::PoolExhaustionWithConnectionDropError,
));
}
result = self.connection_pool[self.current_connection as usize]
.set_ex(hashed_url_string, json_results, 60)
.await;
continue;
}
false => return Err(Report::new(PoolError::RedisError(error))),
},
Ok(_) => return Ok(()),
match self {
Cache::Redis(redis_cache) => redis_cache.cache_results(&json_results, url).await,
Cache::InMemory(cache) => {
cache.insert(url.to_string(), json_results);
Ok(())
}
}
}
}
/// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
pub struct SharedCache {
cache: Mutex<Cache>,
}
impl SharedCache {
/// Creates a new SharedCache from a Cache implementation
pub fn new(cache: Cache) -> Self {
Self {
cache: Mutex::new(cache),
}
}
/// A function which fetches the cached json results as json string.
pub async fn cached_json(&self, url: &str) -> Result<String, Report<PoolError>> {
let mut mut_cache = self.cache.lock().await;
mut_cache.cached_json(url).await
}
/// A function which caches the results by using the `url` as the key and
/// `json results` as the value and stores it in the cache
pub async fn cache_results(
&self,
json_results: String,
url: &str,
) -> Result<(), Report<PoolError>> {
let mut mut_cache = self.cache.lock().await;
mut_cache.cache_results(json_results, url).await
}
}

4
src/cache/error.rs vendored
View file

@ -12,6 +12,7 @@ pub enum PoolError {
/// This variant handles the errors which occurs when all the connections
/// in the connection pool return a connection dropped redis error.
PoolExhaustionWithConnectionDropError,
MissingValue,
}
impl fmt::Display for PoolError {
@ -30,6 +31,9 @@ impl fmt::Display for PoolError {
"Error all connections from the pool dropped with connection error"
)
}
PoolError::MissingValue => {
write!(f, "The value is missing from the cache")
}
}
}
}

1
src/cache/mod.rs vendored
View file

@ -3,3 +3,4 @@
pub mod cacher;
pub mod error;
pub mod redis_cacher;

152
src/cache/redis_cacher.rs vendored Normal file
View file

@ -0,0 +1,152 @@
//! This module provides the functionality to cache the aggregated results fetched and aggregated
//! from the upstream search engines in a json format.
use error_stack::Report;
use futures::future::try_join_all;
use md5::compute;
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
use super::error::PoolError;
/// A named struct which stores the redis Connection url address to which the client will
/// connect to.
#[derive(Clone)]
pub struct RedisCache {
/// It stores a pool of connections ready to be used.
connection_pool: Vec<ConnectionManager>,
/// It stores the size of the connection pool (in other words the number of
/// connections that should be stored in the pool).
pool_size: u8,
/// It stores the index of which connection is being used at the moment.
current_connection: u8,
}
impl RedisCache {
/// A function which fetches the cached json results as json string.
///
/// # Arguments
///
/// * `redis_connection_url` - It takes the redis Connection url address.
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
/// connections that should be stored in the pool).
pub async fn new(
redis_connection_url: &str,
pool_size: u8,
) -> Result<Self, Box<dyn std::error::Error>> {
let client = Client::open(redis_connection_url)?;
let mut tasks: Vec<_> = Vec::new();
for _ in 0..pool_size {
tasks.push(client.get_tokio_connection_manager());
}
let redis_cache = RedisCache {
connection_pool: try_join_all(tasks).await?,
pool_size,
current_connection: Default::default(),
};
Ok(redis_cache)
}
/// A helper function which computes the hash of the url and formats and returns it as string.
///
/// # Arguments
///
/// * `url` - It takes an url as string.
fn hash_url(&self, url: &str) -> String {
format!("{:?}", compute(url))
}
/// A function which fetches the cached json results as json string from the redis server.
///
/// # Arguments
///
/// * `url` - It takes an url as a string.
pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<PoolError>> {
self.current_connection = Default::default();
let hashed_url_string: &str = &self.hash_url(url);
let mut result: Result<String, RedisError> = self.connection_pool
[self.current_connection as usize]
.get(hashed_url_string)
.await;
// Code to check whether the current connection being used is dropped with connection error
// or not. if it drops with the connection error then the current connection is replaced
// with a new connection from the pool which is then used to run the redis command then
// that connection is also checked whether it is dropped or not if it is not then the
// result is passed as a `Result` or else the same process repeats again and if all of the
// connections in the pool result in connection drop error then a custom pool error is
// returned.
loop {
match result {
Err(error) => match error.is_connection_dropped() {
true => {
self.current_connection += 1;
if self.current_connection == self.pool_size {
return Err(Report::new(
PoolError::PoolExhaustionWithConnectionDropError,
));
}
result = self.connection_pool[self.current_connection as usize]
.get(hashed_url_string)
.await;
continue;
}
false => return Err(Report::new(PoolError::RedisError(error))),
},
Ok(res) => return Ok(res),
}
}
}
/// A function which caches the results by using the hashed `url` as the key and
/// `json results` as the value and stores it in redis server with ttl(time to live)
/// set to 60 seconds.
///
/// # Arguments
///
/// * `json_results` - It takes the json results string as an argument.
/// * `url` - It takes the url as a String.
pub async fn cache_results(
&mut self,
json_results: &str,
url: &str,
) -> Result<(), Report<PoolError>> {
self.current_connection = Default::default();
let hashed_url_string: &str = &self.hash_url(url);
let mut result: Result<(), RedisError> = self.connection_pool
[self.current_connection as usize]
.set_ex(hashed_url_string, json_results, 60)
.await;
// Code to check whether the current connection being used is dropped with connection error
// or not. if it drops with the connection error then the current connection is replaced
// with a new connection from the pool which is then used to run the redis command then
// that connection is also checked whether it is dropped or not if it is not then the
// result is passed as a `Result` or else the same process repeats again and if all of the
// connections in the pool result in connection drop error then a custom pool error is
// returned.
loop {
match result {
Err(error) => match error.is_connection_dropped() {
true => {
self.current_connection += 1;
if self.current_connection == self.pool_size {
return Err(Report::new(
PoolError::PoolExhaustionWithConnectionDropError,
));
}
result = self.connection_pool[self.current_connection as usize]
.set_ex(hashed_url_string, json_results, 60)
.await;
continue;
}
false => return Err(Report::new(PoolError::RedisError(error))),
},
Ok(_) => return Ok(()),
}
}
}
}