Skip to content

Commit

Permalink
fix: be compatible with v1 index format (#2582)
Browse files Browse the repository at this point in the history
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
  • Loading branch information
BubbleCal authored and eddyxu committed Jul 11, 2024
1 parent cf6b2d4 commit 85bab4d
Showing 1 changed file with 33 additions and 1 deletion.
34 changes: 33 additions & 1 deletion rust/lance-index/src/vector/ivf/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

use std::ops::Range;

use arrow_array::{Array, ArrayRef, FixedSizeListArray, UInt32Array};
use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array, UInt32Array};
use deepsize::DeepSizeOf;
use itertools::Itertools;
use lance_arrow::FixedSizeListArrayExt;
use lance_core::{Error, Result};
use lance_file::{reader::FileReader, writer::FileWriter};
use lance_io::{traits::WriteExt, utils::read_message};
Expand Down Expand Up @@ -176,9 +177,21 @@ impl TryFrom<PbIvf> for IvfModel {

fn try_from(proto: PbIvf) -> Result<Self> {
let centroids = if let Some(tensor) = proto.centroids_tensor.as_ref() {
// For new index format and IVFIndex
debug!("Ivf: loading IVF centroids from index format v2");
Some(FixedSizeListArray::try_from(tensor)?)
} else if !proto.centroids.is_empty() {
// For backward-compatibility
debug!("Ivf: loading IVF centroids from index format v1");
let f32_centroids = Float32Array::from(proto.centroids.clone());
let dimension = f32_centroids.len() / proto.lengths.len();
Some(FixedSizeListArray::try_new_from_values(
f32_centroids,
dimension as i32,
)?)
} else {
// We also use IvfModel to track the offsets/lengths of sub-index like HNSW
// which does not have centroids.
None
};
// We are not using offsets from the protobuf, which was the file offset in the
Expand Down Expand Up @@ -224,6 +237,8 @@ mod tests {
use lance_table::format::SelfDescribingFileReader;
use object_store::path::Path;

use crate::pb;

use super::*;

#[test]
Expand Down Expand Up @@ -272,4 +287,21 @@ mod tests {
assert_eq!(ivf, ivf2);
assert_eq!(ivf2.num_partitions(), 2);
}

#[test]
fn test_load_v1_format_ivf() {
// in v1 format, the centroids are stored as a flat array in field `centroids`.
let pb_ivf = pb::Ivf {
centroids: vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
lengths: vec![2, 2],
offsets: vec![0, 2],
centroids_tensor: None,
};

let ivf = IvfModel::try_from(pb_ivf).unwrap();
assert_eq!(ivf.num_partitions(), 2);
assert_eq!(ivf.dimension(), 3);
assert_eq!(ivf.centroids.as_ref().unwrap().len(), 2);
assert_eq!(ivf.centroids.as_ref().unwrap().value_length(), 3);
}
}

0 comments on commit 85bab4d

Please sign in to comment.