LUCENE-9905: rename Lucene90VectorFormat and its reader and writer

This commit is contained in:
Michael Sokolov 2021-04-25 15:25:32 -04:00
parent 6d4b5eaba3
commit 45bd06c804
12 changed files with 55 additions and 93 deletions

View File

@ -84,7 +84,7 @@ public class Lucene90Codec extends Codec {
}
};
private final VectorFormat vectorFormat = new Lucene90VectorFormat();
private final VectorFormat vectorFormat = new Lucene90HnswVectorFormat();
private final StoredFieldsFormat storedFieldsFormat;

View File

@ -64,11 +64,11 @@ import org.apache.lucene.index.SegmentWriteState;
*
* @lucene.experimental
*/
public final class Lucene90VectorFormat extends VectorFormat {
public final class Lucene90HnswVectorFormat extends VectorFormat {
static final String META_CODEC_NAME = "Lucene90VectorFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene90VectorFormatData";
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90VectorFormatIndex";
static final String META_CODEC_NAME = "Lucene90HnswVectorFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorFormatData";
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorFormatIndex";
static final String META_EXTENSION = "vem";
static final String VECTOR_DATA_EXTENSION = "vec";
static final String VECTOR_INDEX_EXTENSION = "vex";
@ -77,15 +77,15 @@ public final class Lucene90VectorFormat extends VectorFormat {
static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor */
public Lucene90VectorFormat() {}
public Lucene90HnswVectorFormat() {}
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene90VectorWriter(state);
return new Lucene90HnswVectorWriter(state);
}
@Override
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene90VectorReader(state);
return new Lucene90HnswVectorReader(state);
}
}

View File

@ -53,7 +53,7 @@ import org.apache.lucene.util.hnsw.NeighborQueue;
*
* @lucene.experimental
*/
public final class Lucene90VectorReader extends VectorReader {
public final class Lucene90HnswVectorReader extends VectorReader {
private final FieldInfos fieldInfos;
private final Map<String, FieldEntry> fields = new HashMap<>();
@ -61,10 +61,10 @@ public final class Lucene90VectorReader extends VectorReader {
private final IndexInput vectorIndex;
private final long checksumSeed;
Lucene90VectorReader(SegmentReadState state) throws IOException {
Lucene90HnswVectorReader(SegmentReadState state) throws IOException {
this.fieldInfos = state.fieldInfos;
int versionMeta = readMetadata(state, Lucene90VectorFormat.META_EXTENSION);
int versionMeta = readMetadata(state, Lucene90HnswVectorFormat.META_EXTENSION);
long[] checksumRef = new long[1];
boolean success = false;
try {
@ -72,15 +72,15 @@ public final class Lucene90VectorReader extends VectorReader {
openDataInput(
state,
versionMeta,
Lucene90VectorFormat.VECTOR_DATA_EXTENSION,
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION,
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
checksumRef);
vectorIndex =
openDataInput(
state,
versionMeta,
Lucene90VectorFormat.VECTOR_INDEX_EXTENSION,
Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION,
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
checksumRef);
success = true;
} finally {
@ -101,9 +101,9 @@ public final class Lucene90VectorReader extends VectorReader {
versionMeta =
CodecUtil.checkIndexHeader(
meta,
Lucene90VectorFormat.META_CODEC_NAME,
Lucene90VectorFormat.VERSION_START,
Lucene90VectorFormat.VERSION_CURRENT,
Lucene90HnswVectorFormat.META_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_START,
Lucene90HnswVectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
readFields(meta, state.fieldInfos);
@ -130,8 +130,8 @@ public final class Lucene90VectorReader extends VectorReader {
CodecUtil.checkIndexHeader(
in,
codecName,
Lucene90VectorFormat.VERSION_START,
Lucene90VectorFormat.VERSION_CURRENT,
Lucene90HnswVectorFormat.VERSION_START,
Lucene90HnswVectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
if (versionMeta != versionVectorData) {
@ -214,7 +214,7 @@ public final class Lucene90VectorReader extends VectorReader {
@Override
public long ramBytesUsed() {
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90VectorReader.class);
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorReader.class);
totalBytes +=
RamUsageEstimator.sizeOfMap(
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
@ -255,7 +255,7 @@ public final class Lucene90VectorReader extends VectorReader {
HnswGraph.search(target, k, k + fanout, vectorValues, getGraphValues(fieldEntry), random);
int i = 0;
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), k)];
boolean reversed = fieldEntry.searchStrategy.reversed;
boolean reversed = fieldEntry.similarityFunction.reversed;
while (results.size() > 0) {
int node = results.topNode();
float score = results.topScore();
@ -292,7 +292,7 @@ public final class Lucene90VectorReader extends VectorReader {
}
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
if (entry.similarityFunction.isHnsw()) {
if (entry.similarityFunction != VectorValues.SimilarityFunction.NONE) {
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
IndexInput bytesSlice =
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);

View File

@ -40,32 +40,32 @@ import org.apache.lucene.util.hnsw.NeighborArray;
*
* @lucene.experimental
*/
public final class Lucene90VectorWriter extends VectorWriter {
public final class Lucene90HnswVectorWriter extends VectorWriter {
private final SegmentWriteState segmentWriteState;
private final IndexOutput meta, vectorData, vectorIndex;
private boolean finished;
Lucene90VectorWriter(SegmentWriteState state) throws IOException {
Lucene90HnswVectorWriter(SegmentWriteState state) throws IOException {
assert state.fieldInfos.hasVectorValues();
segmentWriteState = state;
String metaFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.META_EXTENSION);
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorFormat.META_EXTENSION);
String vectorDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
Lucene90VectorFormat.VECTOR_DATA_EXTENSION);
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION);
String indexDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
Lucene90VectorFormat.VECTOR_INDEX_EXTENSION);
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION);
boolean success = false;
try {
@ -75,20 +75,20 @@ public final class Lucene90VectorWriter extends VectorWriter {
CodecUtil.writeIndexHeader(
meta,
Lucene90VectorFormat.META_CODEC_NAME,
Lucene90VectorFormat.VERSION_CURRENT,
Lucene90HnswVectorFormat.META_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.writeIndexHeader(
vectorData,
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
Lucene90VectorFormat.VERSION_CURRENT,
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.writeIndexHeader(
vectorIndex,
Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
Lucene90VectorFormat.VERSION_CURRENT,
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
success = true;
@ -121,7 +121,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
long[] offsets = new long[count];
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
long vectorIndexOffset = vectorIndex.getFilePointer();
if (vectors.similarityFunction().isHnsw()) {
if (vectors.similarityFunction() != VectorValues.SimilarityFunction.NONE) {
if (vectors instanceof RandomAccessVectorValuesProducer) {
writeGraph(
vectorIndex,
@ -146,7 +146,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
vectorIndexLength,
count,
docIds);
if (vectors.similarityFunction().isHnsw()) {
if (vectors.similarityFunction() != VectorValues.SimilarityFunction.NONE) {
writeGraphOffsets(meta, offsets);
}
}

View File

@ -180,9 +180,9 @@
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
* intersection (2D, 3D).
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90VectorFormat Vector values}. The vector
* format stores numeric vectors in a format optimized for random access and computation,
* supporting high-dimensional nearest-neighbor search.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}. The
* vector format stores numeric vectors in a format optimized for random access and
* computation, supporting high-dimensional nearest-neighbor search.
* </ul>
*
* <p>Details on each of these are provided in their linked pages. </div> <a id="File_Naming"></a>
@ -310,7 +310,7 @@
* <td>Holds indexed points</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90VectorFormat Vector values}</td>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}</td>
* <td>.vec, .vem</td>
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
* <code>.vem</code> the vector metadata</td>

View File

@ -78,9 +78,8 @@ public class VectorField extends Field {
throw new IllegalArgumentException(
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
}
if (similarityFunction == null || !similarityFunction.isHnsw()) {
throw new IllegalArgumentException(
"similarity function must not be null, received: " + similarityFunction);
if (similarityFunction == null || similarityFunction == VectorValues.SimilarityFunction.NONE) {
throw new IllegalArgumentException("similarity function must not be: " + similarityFunction);
}
FieldType type = new FieldType();
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);

View File

@ -38,8 +38,6 @@ import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
@ -2338,29 +2336,6 @@ public final class CheckIndex implements Closeable {
+ docCount
+ " docs with values");
}
VectorReader vectorReader = reader.getVectorReader();
if (vectorReader instanceof Lucene90VectorReader) {
KnnGraphValues graphValues =
((Lucene90VectorReader) vectorReader).getGraphValues(fieldInfo.name);
int size = graphValues.size();
for (int i = 0; i < size; i++) {
graphValues.seek(i);
for (int neighbor = graphValues.nextNeighbor();
neighbor != NO_MORE_DOCS;
neighbor = graphValues.nextNeighbor()) {
if (neighbor < 0 || neighbor >= size) {
throw new RuntimeException(
"Field \""
+ fieldInfo.name
+ "\" has an invalid neighbor ordinal: "
+ neighbor
+ " which should be in [0,"
+ size
+ ")");
}
}
}
}
status.totalVectorValues += docCount;
}
}

View File

@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import static org.apache.lucene.util.VectorUtil.dotProduct;
@ -83,8 +82,8 @@ public abstract class VectorValues extends DocIdSetIterator {
public enum SimilarityFunction {
/**
* No similarity function is provided. Note: {@link VectorReader#search(float[], int, int)} is
* not supported for fields specifying this.
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int,
* int)} is not supported for fields specifying this.
*/
NONE,
@ -127,18 +126,6 @@ public abstract class VectorValues extends DocIdSetIterator {
throw new IllegalStateException("Incomparable similarity function: " + this);
}
}
/** Return true if vectors indexed using this similarity will be indexed using an HNSW graph */
public boolean isHnsw() {
switch (this) {
case EUCLIDEAN:
case DOT_PRODUCT:
return true;
case NONE:
default:
return false;
}
}
}
/**

View File

@ -20,7 +20,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseVectorFormatTestCase;
import org.apache.lucene.util.TestUtil;
public class TestLucene90VectorFormat extends BaseVectorFormatTestCase {
public class TestLucene90HnswVectorFormat extends BaseVectorFormatTestCase {
@Override
protected Codec getCodec() {

View File

@ -27,7 +27,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -171,8 +171,9 @@ public class TestKnnGraph extends LuceneTestCase {
iw.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(dir)) {
Lucene90VectorReader vectorReader =
((Lucene90VectorReader) ((CodecReader) getOnlyLeafReader(reader)).getVectorReader());
Lucene90HnswVectorReader vectorReader =
((Lucene90HnswVectorReader)
((CodecReader) getOnlyLeafReader(reader)).getVectorReader());
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
}
}
@ -309,8 +310,8 @@ public class TestKnnGraph extends LuceneTestCase {
for (LeafReaderContext ctx : dr.leaves()) {
LeafReader reader = ctx.reader();
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
Lucene90VectorReader vectorReader =
((Lucene90VectorReader) ((CodecReader) reader).getVectorReader());
Lucene90HnswVectorReader vectorReader =
((Lucene90HnswVectorReader) ((CodecReader) reader).getVectorReader());
if (vectorReader == null) {
continue;
}

View File

@ -35,7 +35,7 @@ import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
@ -237,7 +237,7 @@ public class KnnGraphTester {
for (LeafReaderContext context : reader.leaves()) {
LeafReader leafReader = context.reader();
KnnGraphValues knnValues =
((Lucene90VectorReader) ((CodecReader) leafReader).getVectorReader())
((Lucene90HnswVectorReader) ((CodecReader) leafReader).getVectorReader())
.getGraphValues(KNN_FIELD);
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
printGraphFanout(knnValues, leafReader.maxDoc());

View File

@ -25,7 +25,7 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.VectorField;
@ -90,7 +90,7 @@ public class TestHnsw extends LuceneTestCase {
assertEquals(indexedDoc, ctx.reader().numDocs());
assertVectorsEqual(v3, values);
KnnGraphValues graphValues =
((Lucene90VectorReader) ((CodecReader) ctx.reader()).getVectorReader())
((Lucene90HnswVectorReader) ((CodecReader) ctx.reader()).getVectorReader())
.getGraphValues("field");
assertGraphEqual(hnsw, graphValues, nVec);
}