mirror of https://github.com/apache/lucene.git
LUCENE-9905: rename Lucene90VectorFormat and its reader and writer
This commit is contained in:
parent
6d4b5eaba3
commit
45bd06c804
|
@ -84,7 +84,7 @@ public class Lucene90Codec extends Codec {
|
|||
}
|
||||
};
|
||||
|
||||
private final VectorFormat vectorFormat = new Lucene90VectorFormat();
|
||||
private final VectorFormat vectorFormat = new Lucene90HnswVectorFormat();
|
||||
|
||||
private final StoredFieldsFormat storedFieldsFormat;
|
||||
|
||||
|
|
|
@ -64,11 +64,11 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90VectorFormat extends VectorFormat {
|
||||
public final class Lucene90HnswVectorFormat extends VectorFormat {
|
||||
|
||||
static final String META_CODEC_NAME = "Lucene90VectorFormatMeta";
|
||||
static final String VECTOR_DATA_CODEC_NAME = "Lucene90VectorFormatData";
|
||||
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90VectorFormatIndex";
|
||||
static final String META_CODEC_NAME = "Lucene90HnswVectorFormatMeta";
|
||||
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorFormatData";
|
||||
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorFormatIndex";
|
||||
static final String META_EXTENSION = "vem";
|
||||
static final String VECTOR_DATA_EXTENSION = "vec";
|
||||
static final String VECTOR_INDEX_EXTENSION = "vex";
|
||||
|
@ -77,15 +77,15 @@ public final class Lucene90VectorFormat extends VectorFormat {
|
|||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** Sole constructor */
|
||||
public Lucene90VectorFormat() {}
|
||||
public Lucene90HnswVectorFormat() {}
|
||||
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new Lucene90VectorWriter(state);
|
||||
return new Lucene90HnswVectorWriter(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new Lucene90VectorReader(state);
|
||||
return new Lucene90HnswVectorReader(state);
|
||||
}
|
||||
}
|
|
@ -53,7 +53,7 @@ import org.apache.lucene.util.hnsw.NeighborQueue;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90VectorReader extends VectorReader {
|
||||
public final class Lucene90HnswVectorReader extends VectorReader {
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
private final Map<String, FieldEntry> fields = new HashMap<>();
|
||||
|
@ -61,10 +61,10 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
private final IndexInput vectorIndex;
|
||||
private final long checksumSeed;
|
||||
|
||||
Lucene90VectorReader(SegmentReadState state) throws IOException {
|
||||
Lucene90HnswVectorReader(SegmentReadState state) throws IOException {
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
|
||||
int versionMeta = readMetadata(state, Lucene90VectorFormat.META_EXTENSION);
|
||||
int versionMeta = readMetadata(state, Lucene90HnswVectorFormat.META_EXTENSION);
|
||||
long[] checksumRef = new long[1];
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -72,15 +72,15 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
openDataInput(
|
||||
state,
|
||||
versionMeta,
|
||||
Lucene90VectorFormat.VECTOR_DATA_EXTENSION,
|
||||
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
|
||||
checksumRef);
|
||||
vectorIndex =
|
||||
openDataInput(
|
||||
state,
|
||||
versionMeta,
|
||||
Lucene90VectorFormat.VECTOR_INDEX_EXTENSION,
|
||||
Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
checksumRef);
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -101,9 +101,9 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
versionMeta =
|
||||
CodecUtil.checkIndexHeader(
|
||||
meta,
|
||||
Lucene90VectorFormat.META_CODEC_NAME,
|
||||
Lucene90VectorFormat.VERSION_START,
|
||||
Lucene90VectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorFormat.META_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_START,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
readFields(meta, state.fieldInfos);
|
||||
|
@ -130,8 +130,8 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
CodecUtil.checkIndexHeader(
|
||||
in,
|
||||
codecName,
|
||||
Lucene90VectorFormat.VERSION_START,
|
||||
Lucene90VectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorFormat.VERSION_START,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
if (versionMeta != versionVectorData) {
|
||||
|
@ -214,7 +214,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90VectorReader.class);
|
||||
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorReader.class);
|
||||
totalBytes +=
|
||||
RamUsageEstimator.sizeOfMap(
|
||||
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
|
||||
|
@ -255,7 +255,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
HnswGraph.search(target, k, k + fanout, vectorValues, getGraphValues(fieldEntry), random);
|
||||
int i = 0;
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), k)];
|
||||
boolean reversed = fieldEntry.searchStrategy.reversed;
|
||||
boolean reversed = fieldEntry.similarityFunction.reversed;
|
||||
while (results.size() > 0) {
|
||||
int node = results.topNode();
|
||||
float score = results.topScore();
|
||||
|
@ -292,7 +292,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
}
|
||||
|
||||
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
|
||||
if (entry.similarityFunction.isHnsw()) {
|
||||
if (entry.similarityFunction != VectorValues.SimilarityFunction.NONE) {
|
||||
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
|
||||
IndexInput bytesSlice =
|
||||
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);
|
|
@ -40,32 +40,32 @@ import org.apache.lucene.util.hnsw.NeighborArray;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90VectorWriter extends VectorWriter {
|
||||
public final class Lucene90HnswVectorWriter extends VectorWriter {
|
||||
|
||||
private final SegmentWriteState segmentWriteState;
|
||||
private final IndexOutput meta, vectorData, vectorIndex;
|
||||
|
||||
private boolean finished;
|
||||
|
||||
Lucene90VectorWriter(SegmentWriteState state) throws IOException {
|
||||
Lucene90HnswVectorWriter(SegmentWriteState state) throws IOException {
|
||||
assert state.fieldInfos.hasVectorValues();
|
||||
segmentWriteState = state;
|
||||
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.META_EXTENSION);
|
||||
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorFormat.META_EXTENSION);
|
||||
|
||||
String vectorDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name,
|
||||
state.segmentSuffix,
|
||||
Lucene90VectorFormat.VECTOR_DATA_EXTENSION);
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION);
|
||||
|
||||
String indexDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name,
|
||||
state.segmentSuffix,
|
||||
Lucene90VectorFormat.VECTOR_INDEX_EXTENSION);
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -75,20 +75,20 @@ public final class Lucene90VectorWriter extends VectorWriter {
|
|||
|
||||
CodecUtil.writeIndexHeader(
|
||||
meta,
|
||||
Lucene90VectorFormat.META_CODEC_NAME,
|
||||
Lucene90VectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorFormat.META_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
CodecUtil.writeIndexHeader(
|
||||
vectorData,
|
||||
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
|
||||
Lucene90VectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
CodecUtil.writeIndexHeader(
|
||||
vectorIndex,
|
||||
Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
Lucene90VectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
success = true;
|
||||
|
@ -121,7 +121,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
|
|||
long[] offsets = new long[count];
|
||||
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
|
||||
long vectorIndexOffset = vectorIndex.getFilePointer();
|
||||
if (vectors.similarityFunction().isHnsw()) {
|
||||
if (vectors.similarityFunction() != VectorValues.SimilarityFunction.NONE) {
|
||||
if (vectors instanceof RandomAccessVectorValuesProducer) {
|
||||
writeGraph(
|
||||
vectorIndex,
|
||||
|
@ -146,7 +146,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
|
|||
vectorIndexLength,
|
||||
count,
|
||||
docIds);
|
||||
if (vectors.similarityFunction().isHnsw()) {
|
||||
if (vectors.similarityFunction() != VectorValues.SimilarityFunction.NONE) {
|
||||
writeGraphOffsets(meta, offsets);
|
||||
}
|
||||
}
|
|
@ -180,9 +180,9 @@
|
|||
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
|
||||
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
|
||||
* intersection (2D, 3D).
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90VectorFormat Vector values}. The vector
|
||||
* format stores numeric vectors in a format optimized for random access and computation,
|
||||
* supporting high-dimensional nearest-neighbor search.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}. The
|
||||
* vector format stores numeric vectors in a format optimized for random access and
|
||||
* computation, supporting high-dimensional nearest-neighbor search.
|
||||
* </ul>
|
||||
*
|
||||
* <p>Details on each of these are provided in their linked pages. </div> <a id="File_Naming"></a>
|
||||
|
@ -310,7 +310,7 @@
|
|||
* <td>Holds indexed points</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90VectorFormat Vector values}</td>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}</td>
|
||||
* <td>.vec, .vem</td>
|
||||
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
|
||||
* <code>.vem</code> the vector metadata</td>
|
||||
|
|
|
@ -78,9 +78,8 @@ public class VectorField extends Field {
|
|||
throw new IllegalArgumentException(
|
||||
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
|
||||
}
|
||||
if (similarityFunction == null || !similarityFunction.isHnsw()) {
|
||||
throw new IllegalArgumentException(
|
||||
"similarity function must not be null, received: " + similarityFunction);
|
||||
if (similarityFunction == null || similarityFunction == VectorValues.SimilarityFunction.NONE) {
|
||||
throw new IllegalArgumentException("similarity function must not be: " + similarityFunction);
|
||||
}
|
||||
FieldType type = new FieldType();
|
||||
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
|
||||
|
|
|
@ -38,8 +38,6 @@ import org.apache.lucene.codecs.PointsReader;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
||||
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
||||
|
@ -2338,29 +2336,6 @@ public final class CheckIndex implements Closeable {
|
|||
+ docCount
|
||||
+ " docs with values");
|
||||
}
|
||||
VectorReader vectorReader = reader.getVectorReader();
|
||||
if (vectorReader instanceof Lucene90VectorReader) {
|
||||
KnnGraphValues graphValues =
|
||||
((Lucene90VectorReader) vectorReader).getGraphValues(fieldInfo.name);
|
||||
int size = graphValues.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
graphValues.seek(i);
|
||||
for (int neighbor = graphValues.nextNeighbor();
|
||||
neighbor != NO_MORE_DOCS;
|
||||
neighbor = graphValues.nextNeighbor()) {
|
||||
if (neighbor < 0 || neighbor >= size) {
|
||||
throw new RuntimeException(
|
||||
"Field \""
|
||||
+ fieldInfo.name
|
||||
+ "\" has an invalid neighbor ordinal: "
|
||||
+ neighbor
|
||||
+ " which should be in [0,"
|
||||
+ size
|
||||
+ ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
status.totalVectorValues += docCount;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import static org.apache.lucene.util.VectorUtil.dotProduct;
|
||||
|
@ -83,8 +82,8 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||
public enum SimilarityFunction {
|
||||
|
||||
/**
|
||||
* No similarity function is provided. Note: {@link VectorReader#search(float[], int, int)} is
|
||||
* not supported for fields specifying this.
|
||||
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int,
|
||||
* int)} is not supported for fields specifying this.
|
||||
*/
|
||||
NONE,
|
||||
|
||||
|
@ -127,18 +126,6 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||
throw new IllegalStateException("Incomparable similarity function: " + this);
|
||||
}
|
||||
}
|
||||
|
||||
/** Return true if vectors indexed using this similarity will be indexed using an HNSW graph */
|
||||
public boolean isHnsw() {
|
||||
switch (this) {
|
||||
case EUCLIDEAN:
|
||||
case DOT_PRODUCT:
|
||||
return true;
|
||||
case NONE:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestLucene90VectorFormat extends BaseVectorFormatTestCase {
|
||||
public class TestLucene90HnswVectorFormat extends BaseVectorFormatTestCase {
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
|
@ -27,7 +27,7 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -171,8 +171,9 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
iw.forceMerge(1);
|
||||
}
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
Lucene90VectorReader vectorReader =
|
||||
((Lucene90VectorReader) ((CodecReader) getOnlyLeafReader(reader)).getVectorReader());
|
||||
Lucene90HnswVectorReader vectorReader =
|
||||
((Lucene90HnswVectorReader)
|
||||
((CodecReader) getOnlyLeafReader(reader)).getVectorReader());
|
||||
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
|
||||
}
|
||||
}
|
||||
|
@ -309,8 +310,8 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
for (LeafReaderContext ctx : dr.leaves()) {
|
||||
LeafReader reader = ctx.reader();
|
||||
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
|
||||
Lucene90VectorReader vectorReader =
|
||||
((Lucene90VectorReader) ((CodecReader) reader).getVectorReader());
|
||||
Lucene90HnswVectorReader vectorReader =
|
||||
((Lucene90HnswVectorReader) ((CodecReader) reader).getVectorReader());
|
||||
if (vectorReader == null) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.nio.file.Paths;
|
|||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
|
@ -237,7 +237,7 @@ public class KnnGraphTester {
|
|||
for (LeafReaderContext context : reader.leaves()) {
|
||||
LeafReader leafReader = context.reader();
|
||||
KnnGraphValues knnValues =
|
||||
((Lucene90VectorReader) ((CodecReader) leafReader).getVectorReader())
|
||||
((Lucene90HnswVectorReader) ((CodecReader) leafReader).getVectorReader())
|
||||
.getGraphValues(KNN_FIELD);
|
||||
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
|
||||
printGraphFanout(knnValues, leafReader.maxDoc());
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.HashSet;
|
|||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
|
@ -90,7 +90,7 @@ public class TestHnsw extends LuceneTestCase {
|
|||
assertEquals(indexedDoc, ctx.reader().numDocs());
|
||||
assertVectorsEqual(v3, values);
|
||||
KnnGraphValues graphValues =
|
||||
((Lucene90VectorReader) ((CodecReader) ctx.reader()).getVectorReader())
|
||||
((Lucene90HnswVectorReader) ((CodecReader) ctx.reader()).getVectorReader())
|
||||
.getGraphValues("field");
|
||||
assertGraphEqual(hnsw, graphValues, nVec);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue