mirror of https://github.com/apache/lucene.git
LUCENE-9639: Add unit tests for SimpleTextVector format (#2404)
... and fix the implementation so it passes!
This commit is contained in:
parent
9a30406871
commit
56cb9a304c
|
@ -39,6 +39,7 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
|
@ -63,6 +64,13 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
readState.segmentInfo.name,
|
||||
readState.segmentSuffix,
|
||||
SimpleTextVectorFormat.META_EXTENSION);
|
||||
String vectorFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
readState.segmentInfo.name,
|
||||
readState.segmentSuffix,
|
||||
SimpleTextVectorFormat.VECTOR_EXTENSION);
|
||||
|
||||
boolean success = false;
|
||||
try (ChecksumIndexInput in =
|
||||
readState.directory.openChecksumInput(metaFileName, IOContext.DEFAULT)) {
|
||||
int fieldNumber = readInt(in, FIELD_NUMBER);
|
||||
|
@ -86,21 +94,23 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
fieldNumber = readInt(in, FIELD_NUMBER);
|
||||
}
|
||||
SimpleTextUtil.checkFooter(in);
|
||||
}
|
||||
|
||||
String vectorFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
readState.segmentInfo.name,
|
||||
readState.segmentSuffix,
|
||||
SimpleTextVectorFormat.VECTOR_EXTENSION);
|
||||
dataIn = readState.directory.openInput(vectorFileName, IOContext.DEFAULT);
|
||||
dataIn = readState.directory.openInput(vectorFileName, IOContext.DEFAULT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success == false) {
|
||||
IOUtils.closeWhileHandlingException(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorValues getVectorValues(String field) throws IOException {
|
||||
FieldInfo info = readState.fieldInfos.fieldInfo(field);
|
||||
if (info == null) {
|
||||
throw new IllegalStateException("No vectors indexed for field=\"" + field + "\"");
|
||||
// mirror the handling in Lucene90VectorReader#getVectorValues
|
||||
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
|
||||
return null;
|
||||
}
|
||||
int dimension = info.getVectorDimension();
|
||||
if (dimension == 0) {
|
||||
|
@ -108,7 +118,9 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
}
|
||||
FieldEntry fieldEntry = fieldEntries.get(field);
|
||||
if (fieldEntry == null) {
|
||||
throw new IllegalStateException("No entry found for vector field=\"" + field + "\"");
|
||||
// mirror the handling in Lucene90VectorReader#getVectorValues
|
||||
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
|
||||
return null;
|
||||
}
|
||||
if (dimension != fieldEntry.dimension) {
|
||||
throw new IllegalStateException(
|
||||
|
@ -133,6 +145,15 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
// in SimpleTextUtil.CHECKSUM):
|
||||
long footerStartPos = dataIn.length() - (SimpleTextUtil.CHECKSUM.length + 21);
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
|
||||
|
||||
// when there's no actual vector data written (e.g. tested in
|
||||
// TestSimpleTextVectorFormat#testDeleteAllVectorDocs)
|
||||
// the first line in dataInput will be, checksum 00000000000000000000
|
||||
if (footerStartPos == 0) {
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
return;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (input.getFilePointer() >= footerStartPos) {
|
||||
|
@ -244,7 +265,13 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
public int docID() {
|
||||
if (curOrd == -1) {
|
||||
return -1;
|
||||
} else if (curOrd >= entry.size()) {
|
||||
// when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
|
||||
// immediately afterward should also return NO_MORE_DOCS
|
||||
// this is needed for TestSimpleTextVectorFormat.testAdvance test case
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
return entry.ordToDoc[curOrd];
|
||||
}
|
||||
|
||||
|
|
|
@ -50,15 +50,24 @@ public class SimpleTextVectorWriter extends VectorWriter {
|
|||
SimpleTextVectorWriter(SegmentWriteState state) throws IOException {
|
||||
assert state.fieldInfos.hasVectorValues();
|
||||
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.META_EXTENSION);
|
||||
meta = state.directory.createOutput(metaFileName, state.context);
|
||||
boolean success = false;
|
||||
// exception handling to pass TestSimpleTextVectorFormat#testRandomExceptions
|
||||
try {
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.META_EXTENSION);
|
||||
meta = state.directory.createOutput(metaFileName, state.context);
|
||||
|
||||
String vectorDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.VECTOR_EXTENSION);
|
||||
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
|
||||
String vectorDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.VECTOR_EXTENSION);
|
||||
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success == false) {
|
||||
IOUtils.closeWhileHandlingException(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -71,7 +80,9 @@ public class SimpleTextVectorWriter extends VectorWriter {
|
|||
docIds.add(docV);
|
||||
}
|
||||
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
|
||||
writeMeta(fieldInfo, vectorDataOffset, vectorDataLength, docIds);
|
||||
if (vectorDataLength > 0) {
|
||||
writeMeta(fieldInfo, vectorDataOffset, vectorDataLength, docIds);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeVectorValue(VectorValues vectors) throws IOException {
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
||||
|
||||
public class TestSimpleTextVectorFormat extends BaseVectorFormatTestCase {
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return new SimpleTextCodec();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue