Skip to content

Commit

Permalink
Address Julie's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mayya-sharipova committed Oct 15, 2021
1 parent 5004872 commit d5cc59f
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 165 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import org.elasticsearch.index.mapper.MapperService;

/**
* {@link PerFieldMappingCodec This postings format} is the default
* {@link PerFieldMappingCodec This Lucene codec} provides the default
* {@link PostingsFormat} and {@link KnnVectorsFormat} for Elasticsearch. It utilizes the
* {@link MapperService} to lookup a {@link PostingsFormat} and {@link KnnVectorsFormat} per field. This
* allows users to change the low level postings format and vectors format for individual fields
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ public PostingsFormat getPostingsFormat(String field) {
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
Mapper fieldMapper = fieldMappers.get(field);
if (fieldMapper instanceof VectorFieldMapper) {
return ((VectorFieldMapper) fieldMapper).getKnnVectorsFormatForField();
if (fieldMapper instanceof PerFieldKnnVectorsFormatFieldMapper) {
return ((PerFieldKnnVectorsFormatFieldMapper) fieldMapper).getKnnVectorsFormatForField();
} else {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.index.mapper;

import org.apache.lucene.codecs.KnnVectorsFormat;

/**
* Field mapper used for the only purpose to provide a custom knn vectors format.
* For internal use only.
*/

public interface PerFieldKnnVectorsFormatFieldMapper {

/**
* Returns the knn vectors format that is customly set up for this field or {@code null} if
* the format is not set up or if the set up format matches the default format.
* @return the knn vectors format for the field, or {@code null} if the default format should be used
*/
KnnVectorsFormat getKnnVectorsFormatForField();
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ setup:
dims: 5
index: true
similarity: dot_product
index_options:
type: hnsw
m: 15
ef_construction: 80

- do:
index:
index: test-index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ setup:
index_options:
type: hnsw
m: 15
ef_construction: 50

---
"Indexing of Dense vectors should error when dims don't match defined in the mapping":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.elasticsearch.xpack.vectors.mapper;

import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnVectorField;
Expand All @@ -16,7 +18,10 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.index.mapper.VectorFieldMapper;
import org.elasticsearch.index.mapper.MappingParser;
import org.elasticsearch.index.mapper.PerFieldKnnVectorsFormatFieldMapper;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser.Token;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
Expand All @@ -41,17 +46,21 @@
import java.time.ZoneId;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Supplier;

import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;

/**
* A {@link FieldMapper} for indexing a dense vector of floats.
*/
public class DenseVectorFieldMapper extends VectorFieldMapper {
public class DenseVectorFieldMapper extends FieldMapper implements PerFieldKnnVectorsFormatFieldMapper {

public static final String CONTENT_TYPE = "dense_vector";
public static short MAX_DIMS_COUNT = 2048; //maximum allowed number of dimensions
public static final IndexOptions DEFAULT_INDEX_OPTIONS = new HnswIndexOptions(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
private static final byte INT_BYTES = 4;

private static DenseVectorFieldMapper toType(FieldMapper in) {
Expand All @@ -75,7 +84,7 @@ public static class Builder extends FieldMapper.Builder {
private final Parameter<VectorSimilarity> similarity = Parameter.enumParam(
"similarity", false, m -> toType(m).similarity, null, VectorSimilarity.class);
private final Parameter<IndexOptions> indexOptions = new Parameter<>("index_options", false, () -> null,
(n, c, o) -> VectorFieldMapper.parseVectorIndexOptions(n, o), m -> toType(m).indexOptions);
(n, c, o) -> o == null ? null : parseIndexOptions(n, o), m -> toType(m).indexOptions);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

final Version indexVersionCreated;
Expand Down Expand Up @@ -105,10 +114,10 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) {
dims.getValue(),
indexed.getValue(),
similarity.getValue(),
indexOptions.getValue(),
indexVersionCreated,
multiFieldsBuilder.build(this, context),
copyTo.build(),
indexOptions.getValue());
copyTo.build());
}
}

Expand All @@ -122,6 +131,71 @@ enum VectorSimilarity {
}
}

abstract static class IndexOptions implements ToXContent {
final String type;
IndexOptions(String type) {
this.type = type;
}
}

static class HnswIndexOptions extends IndexOptions {
private final int m;
private final int efConstruction;

static IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOptionsMap) {
Object mNode = indexOptionsMap.remove("m");
Object efConstructionNode = indexOptionsMap.remove("ef_construction");
if (mNode == null) {
throw new MapperParsingException("[index_options] of type [hnsw] requires field [m] to be configured");
}
if (efConstructionNode == null) {
throw new MapperParsingException("[index_options] of type [hnsw] requires field [ef_construction] to be configured");
}
int m = XContentMapValues.nodeIntegerValue(mNode);
int efConstruction = XContentMapValues.nodeIntegerValue(efConstructionNode);
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
if (m == DEFAULT_MAX_CONN && efConstruction == DEFAULT_BEAM_WIDTH) {
return DEFAULT_INDEX_OPTIONS;
} else {
return new HnswIndexOptions(m, efConstruction);
}
}

private HnswIndexOptions(int m, int efConstruction) {
super("hnsw");
this.m = m;
this.efConstruction = efConstruction;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field("type", type);
builder.field("m", m);
builder.field("ef_construction", efConstruction);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
HnswIndexOptions that = (HnswIndexOptions) o;
return m == that.m && efConstruction == that.efConstruction;
}

@Override
public int hashCode() {
return Objects.hash(type, m, efConstruction);
}

@Override
public String toString() {
return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + " }";
}
}

public static final TypeParser PARSER
= new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated()), notInMultiFields(CONTENT_TYPE));

Expand Down Expand Up @@ -191,15 +265,17 @@ public Query termQuery(Object value, SearchExecutionContext context) {
private final int dims;
private final boolean indexed;
private final VectorSimilarity similarity;
private final IndexOptions indexOptions;
private final Version indexCreatedVersion;

private DenseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, int dims, boolean indexed,
VectorSimilarity similarity, Version indexCreatedVersion, MultiFields multiFields,
CopyTo copyTo, VectorFieldMapper.IndexOptions indexOptions) {
super(simpleName, mappedFieldType, multiFields, copyTo, indexOptions);
VectorSimilarity similarity, IndexOptions indexOptions,
Version indexCreatedVersion, MultiFields multiFields, CopyTo copyTo) {
super(simpleName, mappedFieldType, multiFields, copyTo);
this.dims = dims;
this.indexed = indexed;
this.similarity = similarity;
this.indexOptions = indexOptions;
this.indexCreatedVersion = indexCreatedVersion;
}

Expand Down Expand Up @@ -295,4 +371,29 @@ protected String contentType() {
public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), indexCreatedVersion).init(this);
}

public static IndexOptions parseIndexOptions(String fieldName, Object propNode) {
@SuppressWarnings("unchecked")
Map<String, ?> indexOptionsMap = (Map<String, ?>) propNode;
Object typeNode = indexOptionsMap.remove("type");
if (typeNode == null) {
throw new MapperParsingException("[index_options] requires field [type] to be configured");
}
String type = XContentMapValues.nodeStringValue(typeNode);
if (type.equals("hnsw")) {
return HnswIndexOptions.parseIndexOptions(fieldName, indexOptionsMap);
} else {
throw new MapperParsingException("Unknown vector index options type [" + type + "] for field [" + fieldName + "]");
}
}

@Override
public KnnVectorsFormat getKnnVectorsFormatForField() {
if (indexOptions == null || indexOptions == DEFAULT_INDEX_OPTIONS) {
return null; // use default format
} else {
HnswIndexOptions hnswIndexOptions = (HnswIndexOptions) indexOptions;
return new Lucene90HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction);
}
}
}
Loading

0 comments on commit d5cc59f

Please sign in to comment.