Skip to content

Commit

Permalink
[ENH] Allow cache eviction for HNSW provider (#2484)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
	 - This PR introduces cache with eviction policy to HNSQ provider, allowing eviction of HNSW files. 
 - New functionality
	 - ...

## Test plan
*How are these changes tested?*

- [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust

## Documentation Changes
*Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?*
  • Loading branch information
Ishiihara committed Jul 16, 2024
1 parent 2e5bff4 commit 49686be
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 60 deletions.
10 changes: 10 additions & 0 deletions rust/worker/chroma_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ query_service:
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000

compaction_service:
service_name: "compaction-service"
Expand Down Expand Up @@ -101,3 +106,8 @@ compaction_service:
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
19 changes: 13 additions & 6 deletions rust/worker/src/compactor/compaction_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use futures::stream::FuturesUnordered;
use futures::StreamExt;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::path::PathBuf;
use std::sync::atomic::AtomicU32;
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -222,21 +221,23 @@ impl Configurable<CompactionServiceConfig> for CompactionManager {
assignment_policy,
);

// TODO: real path
let path = PathBuf::from("~/tmp");
// TODO: hnsw index provider should be injected somehow
let blockfile_provider = BlockfileProvider::try_from_config(&(
config.blockfile_provider.clone(),
storage.clone(),
))
.await?;

let hnsw_index_provider =
HnswIndexProvider::try_from_config(&(config.hnsw_provider.clone(), storage.clone()))
.await?;

Ok(CompactionManager::new(
scheduler,
log,
sysdb,
storage.clone(),
blockfile_provider,
HnswIndexProvider::new(storage.clone(), path),
hnsw_index_provider,
compaction_manager_queue_size,
Duration::from_secs(compaction_interval_sec),
min_compaction_size,
Expand Down Expand Up @@ -315,6 +316,7 @@ mod tests {
use crate::types::OperationRecord;
use crate::types::Segment;
use std::collections::HashMap;
use std::path::PathBuf;
use std::str::FromStr;
use uuid::Uuid;

Expand Down Expand Up @@ -497,6 +499,7 @@ mod tests {

let block_cache = Cache::new(&CacheConfig::Unbounded(UnboundedCacheConfig {}));
let sparse_index_cache = Cache::new(&CacheConfig::Unbounded(UnboundedCacheConfig {}));
let hnsw_cache = Cache::new(&CacheConfig::Unbounded(UnboundedCacheConfig {}));
let mut manager = CompactionManager::new(
scheduler,
log,
Expand All @@ -508,7 +511,11 @@ mod tests {
block_cache,
sparse_index_cache,
),
HnswIndexProvider::new(storage, PathBuf::from(tmpdir.path().to_str().unwrap())),
HnswIndexProvider::new(
storage,
PathBuf::from(tmpdir.path().to_str().unwrap()),
hnsw_cache,
),
compaction_manager_queue_size,
compaction_interval,
min_compaction_size,
Expand Down
42 changes: 42 additions & 0 deletions rust/worker/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ pub(crate) struct QueryServiceConfig {
pub(crate) log: crate::log::config::LogConfig,
pub(crate) dispatcher: crate::execution::config::DispatcherConfig,
pub(crate) blockfile_provider: crate::blockstore::config::BlockfileProviderConfig,
pub(crate) hnsw_provider: crate::index::config::HnswProviderConfig,
}

#[derive(Deserialize)]
Expand All @@ -130,6 +131,7 @@ pub(crate) struct CompactionServiceConfig {
pub(crate) dispatcher: crate::execution::config::DispatcherConfig,
pub(crate) compactor: crate::compactor::config::CompactorConfig,
pub(crate) blockfile_provider: crate::blockstore::config::BlockfileProviderConfig,
pub(crate) hnsw_provider: crate::index::config::HnswProviderConfig,
}

/// # Description
Expand Down Expand Up @@ -203,6 +205,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
compaction_service:
service_name: "compaction-service"
Expand Down Expand Up @@ -255,6 +262,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
"#,
);
let config = RootConfig::load();
Expand Down Expand Up @@ -323,6 +335,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
compaction_service:
service_name: "compaction-service"
Expand Down Expand Up @@ -375,6 +392,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
"#,
);
let config = RootConfig::load_from_path("random_path.yaml");
Expand Down Expand Up @@ -461,6 +483,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
compaction_service:
service_name: "compaction-service"
Expand Down Expand Up @@ -513,6 +540,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
"#,
);
let config = RootConfig::load();
Expand Down Expand Up @@ -593,6 +625,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
compaction_service:
service_name: "compaction-service"
Expand Down Expand Up @@ -637,6 +674,11 @@ mod tests {
sparse_index_cache_config:
lru:
capacity: 1000
hnsw_provider:
hnsw_temporary_path: "~/tmp"
hnsw_cache_config:
lru:
capacity: 1000
"#,
);
let config = RootConfig::load();
Expand Down
8 changes: 8 additions & 0 deletions rust/worker/src/index/config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use crate::cache::config::CacheConfig;
use serde::Deserialize;

#[derive(Deserialize, Debug, Clone)]
pub(crate) struct HnswProviderConfig {
pub(crate) hnsw_temporary_path: String,
pub(crate) hnsw_cache_config: CacheConfig,
}
Loading

0 comments on commit 49686be

Please sign in to comment.