mirror of https://github.com/apache/lucene.git
LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)
Before when creating a KnnVectorsWriter for merging, we consulted the existing "PER_FIELD_SUFFIX_KEY" attribute to determine the format's per-field suffix. This isn't correct since we could be using a new codec (that produces different formats/ suffixes). This commit modifies TestPerFieldDocValuesFormat#testMergeUsesNewFormat to trigger the problem. Without the fix we it throws an error like "java.nio.file.FileAlreadyExistsException: File "_3_Lucene90HnswVectorsFormat_0.vem" was already written to."
This commit is contained in:
parent
94b66c0ed2
commit
53586d4231
|
@ -123,25 +123,17 @@ public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
|
|||
final String formatName = format.getName();
|
||||
|
||||
field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
|
||||
Integer suffix = null;
|
||||
Integer suffix;
|
||||
|
||||
WriterAndSuffix writerAndSuffix = formats.get(format);
|
||||
if (writerAndSuffix == null) {
|
||||
// First time we are seeing this format; create a new instance
|
||||
|
||||
String suffixAtt = field.getAttribute(PER_FIELD_SUFFIX_KEY);
|
||||
if (suffixAtt != null) {
|
||||
suffix = Integer.valueOf(suffixAtt);
|
||||
}
|
||||
|
||||
suffix = suffixes.get(formatName);
|
||||
if (suffix == null) {
|
||||
// bump the suffix
|
||||
suffix = suffixes.get(formatName);
|
||||
if (suffix == null) {
|
||||
suffix = 0;
|
||||
} else {
|
||||
suffix = suffix + 1;
|
||||
}
|
||||
suffix = 0;
|
||||
} else {
|
||||
suffix = suffix + 1;
|
||||
}
|
||||
suffixes.put(formatName, suffix);
|
||||
|
||||
|
|
|
@ -123,20 +123,27 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
|||
for (int i = 0; i < 3; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("id", "1", Field.Store.YES));
|
||||
doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
|
||||
doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
|
||||
doc.add(new KnnVectorField("field2", new float[] {1, 2, 3}));
|
||||
iw.addDocument(doc);
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
WriteRecordingKnnVectorsFormat newFormat =
|
||||
WriteRecordingKnnVectorsFormat format1 =
|
||||
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||
WriteRecordingKnnVectorsFormat format2 =
|
||||
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||
newConfig.setCodec(
|
||||
new AssertingCodec() {
|
||||
@Override
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return newFormat;
|
||||
if ("field1".equals(field)) {
|
||||
return format1;
|
||||
} else {
|
||||
return format2;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -145,7 +152,8 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
|||
}
|
||||
|
||||
// Check that the new format was used while merging
|
||||
MatcherAssert.assertThat(newFormat.fieldsWritten, equalTo(Set.of("field")));
|
||||
MatcherAssert.assertThat(format1.fieldsWritten, equalTo(Set.of("field1")));
|
||||
MatcherAssert.assertThat(format2.fieldsWritten, equalTo(Set.of("field2")));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue