-
Notifications
You must be signed in to change notification settings - Fork 466
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
persist: introduce a very small in-mem blob cache
A one-time (skunkworks) experiment showed that showed an environment running our demo "auction" source + mv got 90%+ cache hits with a 1 MiB cache. This doesn't scale up to prod data sizes and doesn't help with multi-process replicas, but the memory usage seems unobjectionable enough to have it for the cases that it does help. Possibly, a decent chunk of why this is true is pubsub. With the low pubsub latencies, we might write some blob to s3, then within milliseconds notify everyone in-process interested in that blob, waking them up and fetching it. This means even a very small cache is useful because things stay in it just long enough for them to get fetched by everyone that immediately needs them. 1 MiB is enough to fit things like state rollups, remap shard writes, and likely many MVs (probably less so for sources, but atm those still happen in another cluster). Touches #19225
- Loading branch information
Showing
7 changed files
with
180 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
// Copyright Materialize, Inc. and contributors. All rights reserved. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the LICENSE file. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0. | ||
|
||
//! In-process caches of [Blob]. | ||
use std::sync::Arc; | ||
|
||
use async_trait::async_trait; | ||
use bytes::Bytes; | ||
use moka::sync::Cache; | ||
use mz_ore::bytes::SegmentedBytes; | ||
use mz_ore::cast::CastFrom; | ||
use mz_persist::location::{Atomicity, Blob, BlobMetadata, ExternalError}; | ||
use tracing::error; | ||
|
||
use crate::cfg::PersistConfig; | ||
use crate::internal::metrics::Metrics; | ||
|
||
// In-memory cache for [Blob]. | ||
#[derive(Debug)] | ||
pub struct BlobMemCache { | ||
metrics: Arc<Metrics>, | ||
cache: Cache<String, SegmentedBytes>, | ||
blob: Arc<dyn Blob + Send + Sync>, | ||
} | ||
|
||
impl BlobMemCache { | ||
pub fn new( | ||
cfg: &PersistConfig, | ||
metrics: Arc<Metrics>, | ||
blob: Arc<dyn Blob + Send + Sync>, | ||
) -> Arc<dyn Blob + Send + Sync> { | ||
let cache = Cache::<String, SegmentedBytes>::builder() | ||
.max_capacity(u64::cast_from(cfg.blob_cache_mem_limit_bytes)) | ||
.weigher(|k, v| { | ||
u32::try_from(v.len()).unwrap_or_else(|_| { | ||
// We chunk off blobs at 128MiB, so the length should easily | ||
// fit in a u32. | ||
error!( | ||
"unexpectedly large blob in persist cache {} bytes: {}", | ||
v.len(), | ||
k | ||
); | ||
u32::MAX | ||
}) | ||
}) | ||
.build(); | ||
let blob = BlobMemCache { | ||
metrics, | ||
cache, | ||
blob, | ||
}; | ||
Arc::new(blob) | ||
} | ||
|
||
fn update_size_metrics(&self) { | ||
self.metrics | ||
.blob_cache_mem | ||
.size_blobs | ||
.set(self.cache.entry_count()); | ||
self.metrics | ||
.blob_cache_mem | ||
.size_bytes | ||
.set(self.cache.weighted_size()); | ||
} | ||
} | ||
|
||
#[async_trait] | ||
impl Blob for BlobMemCache { | ||
async fn get(&self, key: &str) -> Result<Option<SegmentedBytes>, ExternalError> { | ||
// First check if the blob is in the cache. If it is, return it. If not, | ||
// fetch it and put it in the cache. | ||
// | ||
// Blobs are write-once modify-never, so we don't have to worry about | ||
// any races or cache invalidations here. If the value is in the cache, | ||
// it's also what's in s3 (if not, then there's a horrible bug somewhere | ||
// else). | ||
if let Some(cached_value) = self.cache.get(key) { | ||
self.metrics.blob_cache_mem.hits_blobs.inc(); | ||
self.metrics | ||
.blob_cache_mem | ||
.hits_bytes | ||
.inc_by(u64::cast_from(cached_value.len())); | ||
return Ok(Some(cached_value)); | ||
} | ||
|
||
// This could maybe use moka's async cache to unify any concurrent | ||
// fetches for the same key? That's not particularly expected in | ||
// persist's workload, so punt for now. | ||
let res = self.blob.get(key).await?; | ||
if let Some(blob) = res.as_ref() { | ||
self.cache.insert(key.to_owned(), blob.clone()); | ||
self.update_size_metrics(); | ||
} | ||
Ok(res) | ||
} | ||
|
||
async fn list_keys_and_metadata( | ||
&self, | ||
key_prefix: &str, | ||
f: &mut (dyn FnMut(BlobMetadata) + Send + Sync), | ||
) -> Result<(), ExternalError> { | ||
self.blob.list_keys_and_metadata(key_prefix, f).await | ||
} | ||
|
||
async fn set(&self, key: &str, value: Bytes, atomic: Atomicity) -> Result<(), ExternalError> { | ||
let () = self.blob.set(key, value.clone(), atomic).await?; | ||
self.cache | ||
.insert(key.to_owned(), SegmentedBytes::from(value)); | ||
self.update_size_metrics(); | ||
Ok(()) | ||
} | ||
|
||
async fn delete(&self, key: &str) -> Result<Option<usize>, ExternalError> { | ||
let res = self.blob.delete(key).await; | ||
self.cache.invalidate(key); | ||
self.update_size_metrics(); | ||
res | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters