diff --git a/Cargo.toml b/Cargo.toml index 3fe6c1a0ad..9c2121ce02 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ exclude = ["python"] resolver = "2" [workspace.package] -version = "0.16.2" +version = "0.17.0" edition = "2021" authors = ["Lance Devs "] license = "Apache-2.0" @@ -44,20 +44,20 @@ categories = [ rust-version = "1.78" [workspace.dependencies] -lance = { version = "=0.16.2", path = "./rust/lance" } -lance-arrow = { version = "=0.16.2", path = "./rust/lance-arrow" } -lance-core = { version = "=0.16.2", path = "./rust/lance-core" } -lance-datafusion = { version = "=0.16.2", path = "./rust/lance-datafusion" } -lance-datagen = { version = "=0.16.2", path = "./rust/lance-datagen" } -lance-encoding = { version = "=0.16.2", path = "./rust/lance-encoding" } -lance-encoding-datafusion = { version = "=0.16.2", path = "./rust/lance-encoding-datafusion" } -lance-file = { version = "=0.16.2", path = "./rust/lance-file" } -lance-index = { version = "=0.16.2", path = "./rust/lance-index" } -lance-io = { version = "=0.16.2", path = "./rust/lance-io" } -lance-linalg = { version = "=0.16.2", path = "./rust/lance-linalg" } -lance-table = { version = "=0.16.2", path = "./rust/lance-table" } -lance-test-macros = { version = "=0.16.2", path = "./rust/lance-test-macros" } -lance-testing = { version = "=0.16.2", path = "./rust/lance-testing" } +lance = { version = "=0.17.0", path = "./rust/lance" } +lance-arrow = { version = "=0.17.0", path = "./rust/lance-arrow" } +lance-core = { version = "=0.17.0", path = "./rust/lance-core" } +lance-datafusion = { version = "=0.17.0", path = "./rust/lance-datafusion" } +lance-datagen = { version = "=0.17.0", path = "./rust/lance-datagen" } +lance-encoding = { version = "=0.17.0", path = "./rust/lance-encoding" } +lance-encoding-datafusion = { version = "=0.17.0", path = "./rust/lance-encoding-datafusion" } +lance-file = { version = "=0.17.0", path = "./rust/lance-file" } +lance-index = { version = "=0.17.0", path = "./rust/lance-index" } +lance-io = { version = "=0.17.0", path = "./rust/lance-io" } +lance-linalg = { version = "=0.17.0", path = "./rust/lance-linalg" } +lance-table = { version = "=0.17.0", path = "./rust/lance-table" } +lance-test-macros = { version = "=0.17.0", path = "./rust/lance-test-macros" } +lance-testing = { version = "=0.17.0", path = "./rust/lance-testing" } approx = "0.5.1" # Note that this one does not include pyarrow arrow = { version = "52.2", optional = false, features = ["prettyprint"] } @@ -110,7 +110,7 @@ datafusion-physical-expr = { version = "40.0", features = [ ] } deepsize = "0.2.0" either = "1.0" -fsst = { version = "=0.16.2", path = "./rust/lance-encoding/compression-algo/fsst" } +fsst = { version = "=0.17.0", path = "./rust/lance-encoding/compression-algo/fsst" } futures = "0.3" http = "0.2.9" hyperloglogplus = { version = "0.4.1", features = ["const-loop"] } diff --git a/python/Cargo.toml b/python/Cargo.toml index a0d83a2be9..34a8669169 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pylance" -version = "0.16.2" +version = "0.17.0" edition = "2021" authors = ["Lance Devs "] rust-version = "1.65" diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py index 2b2be90c8d..7a02883765 100644 --- a/python/python/tests/test_scalar_index.py +++ b/python/python/tests/test_scalar_index.py @@ -60,7 +60,7 @@ def data_table(indexed_dataset: lance.LanceDataset): def test_load_indices(indexed_dataset: lance.LanceDataset): indices = indexed_dataset.list_indices() - vec_idx = next(idx for idx in indices if idx["type"] == "Vector") + vec_idx = next(idx for idx in indices if idx["type"] == "IVF_PQ") scalar_idx = next(idx for idx in indices if idx["type"] == "BTree") assert vec_idx is not None assert scalar_idx is not None diff --git a/python/python/tests/test_vector_index.py b/python/python/tests/test_vector_index.py index 8fa1fbbffb..3a3c42196b 100644 --- a/python/python/tests/test_vector_index.py +++ b/python/python/tests/test_vector_index.py @@ -383,7 +383,7 @@ def test_pre_populated_ivf_centroids(dataset, tmp_path: Path): if platform.system() == "Windows": expected_filepath = expected_filepath.replace("\\", "/") expected_statistics = { - "index_type": "IVF", + "index_type": "IVF_PQ", "uuid": index_uuid, "uri": expected_filepath, "metric_type": "l2", diff --git a/rust/lance-encoding/compression-algo/fsst/Cargo.toml b/rust/lance-encoding/compression-algo/fsst/Cargo.toml index dc013fd629..044304dbfd 100644 --- a/rust/lance-encoding/compression-algo/fsst/Cargo.toml +++ b/rust/lance-encoding/compression-algo/fsst/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fsst" -version = "0.16.2" +version.workspace = true edition.workspace = true authors.workspace = true license.workspace = true diff --git a/rust/lance-index/src/lib.rs b/rust/lance-index/src/lib.rs index 27b4835f40..9b33d00913 100644 --- a/rust/lance-index/src/lib.rs +++ b/rust/lance-index/src/lib.rs @@ -79,7 +79,12 @@ pub enum IndexType { // 100+ and up for vector index. /// Flat vector index. - Vector = 100, + Vector = 100, // Legacy vector index, alias to IvfPq + IvfFlat = 101, + IvfSq = 102, + IvfPq = 103, + IvfHnswSq = 104, + IvfHnswPq = 105, } impl std::fmt::Display for IndexType { @@ -89,7 +94,11 @@ impl std::fmt::Display for IndexType { Self::Bitmap => write!(f, "Bitmap"), Self::LabelList => write!(f, "LabelList"), Self::Inverted => write!(f, "Inverted"), - Self::Vector => write!(f, "Vector"), + Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"), + Self::IvfFlat => write!(f, "IVF_FLAT"), + Self::IvfSq => write!(f, "IVF_SQ"), + Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"), + Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"), } } } @@ -103,7 +112,10 @@ impl IndexType { } pub fn is_vector(&self) -> bool { - matches!(self, Self::Vector) + matches!( + self, + Self::Vector | Self::IvfPq | Self::IvfHnswSq | Self::IvfHnswPq + ) } } diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 6f7554c92f..e782f29a6c 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -2489,7 +2489,7 @@ mod tests { serde_json::from_str(&dataset.index_statistics("embeddings_idx").await.unwrap()) .unwrap(); let actual_statistics = actual_statistics.as_object().unwrap(); - assert_eq!(actual_statistics["index_type"].as_str().unwrap(), "IVF"); + assert_eq!(actual_statistics["index_type"].as_str().unwrap(), "IVF_PQ"); let deltas = actual_statistics["indices"].as_array().unwrap(); assert_eq!(deltas.len(), 1); diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index ccc920fc18..e2eef0201b 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -502,6 +502,7 @@ impl DatasetIndexExt for Dataset { .map(|idx| idx.statistics()) .collect::>>()?; + let index_type = indices[0].index_type().to_string(); let unindexed_fragments = self.unindexed_fragments(index_name).await?; let mut num_unindexed_rows = 0; for f in unindexed_fragments.iter() { @@ -515,7 +516,7 @@ impl DatasetIndexExt for Dataset { let num_indexed_rows = self.count_rows(None).await? - num_unindexed_rows; let stats = json!({ - "index_type": indices_stats[0]["index_type"], + "index_type": index_type, "name": index_name, "num_indices": metadatas.len(), "indices": indices_stats, diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs index 55e7257ef7..9591839122 100644 --- a/rust/lance/src/index/vector/ivf.rs +++ b/rust/lance/src/index/vector/ivf.rs @@ -715,7 +715,25 @@ impl Index for IVFIndex { } fn index_type(&self) -> IndexType { - IndexType::Vector + if self.sub_index.as_any().downcast_ref::().is_some() { + IndexType::IvfPq + } else if self + .sub_index + .as_any() + .downcast_ref::>() + .is_some() + { + IndexType::IvfHnswSq + } else if self + .sub_index + .as_any() + .downcast_ref::>() + .is_some() + { + IndexType::IvfHnswPq + } else { + IndexType::Vector + } } fn statistics(&self) -> Result { @@ -728,7 +746,7 @@ impl Index for IVFIndex { let centroid_vecs = centroids_to_vectors(self.ivf.centroids.as_ref().unwrap())?; Ok(serde_json::to_value(IvfIndexStatistics { - index_type: "IVF".to_string(), + index_type: self.index_type().to_string(), uuid: self.uuid.clone(), uri: to_local_path(self.reader.path()), metric_type: self.metric_type.to_string(), diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 2eb10510ce..e2df2b13a3 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -302,7 +302,14 @@ impl Index for IVFIndex IndexType { - IndexType::Vector + match self.sub_index_type() { + (SubIndexType::Flat, QuantizationType::Flat) => IndexType::IvfFlat, + (SubIndexType::Flat, QuantizationType::Product) => IndexType::IvfPq, + (SubIndexType::Flat, QuantizationType::Scalar) => IndexType::IvfSq, + (SubIndexType::Hnsw, QuantizationType::Product) => IndexType::IvfHnswPq, + (SubIndexType::Hnsw, QuantizationType::Scalar) => IndexType::IvfHnswSq, + _ => IndexType::Vector, + } } fn statistics(&self) -> Result { @@ -314,18 +321,7 @@ impl Index for IVFIndex format!("IVF_{}", sub_index_type), // ignore FLAT quantization - (sub_index_type, quantization_type) => { - if sub_index_type.to_string() == quantization_type.to_string() { - // ignore redundant quantization type - // e.g. IVF_PQ_PQ should be IVF_PQ - format!("IVF_{}", sub_index_type) - } else { - format!("IVF_{}_{}", sub_index_type, quantization_type) - } - } - }; + let index_type = self.index_type().to_string(); let mut sub_index_stats: serde_json::Value = if let Some(metadata) = self.sub_index_metadata.iter().find(|m| !m.is_empty()) { serde_json::from_str(metadata)?