LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)

Before when creating a KnnVectorsWriter for merging, we consulted the existing
"PER_FIELD_SUFFIX_KEY" attribute to determine the format's per-field suffix.
This isn't correct since we could be using a new codec (that produces different
formats/ suffixes).

This commit modifies TestPerFieldDocValuesFormat#testMergeUsesNewFormat to
trigger the problem. Without the fix we it throws an error like
"java.nio.file.FileAlreadyExistsException: File
"_3_Lucene90HnswVectorsFormat_0.vem" was already written to."
This commit is contained in:
Julie Tibshirani 2021-11-10 08:18:01 -08:00 committed by GitHub
parent 94b66c0ed2
commit 53586d4231
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 17 deletions

View File

@ -123,25 +123,17 @@ public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
final String formatName = format.getName();
field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
Integer suffix = null;
Integer suffix;
WriterAndSuffix writerAndSuffix = formats.get(format);
if (writerAndSuffix == null) {
// First time we are seeing this format; create a new instance
String suffixAtt = field.getAttribute(PER_FIELD_SUFFIX_KEY);
if (suffixAtt != null) {
suffix = Integer.valueOf(suffixAtt);
}
suffix = suffixes.get(formatName);
if (suffix == null) {
// bump the suffix
suffix = suffixes.get(formatName);
if (suffix == null) {
suffix = 0;
} else {
suffix = suffix + 1;
}
suffix = 0;
} else {
suffix = suffix + 1;
}
suffixes.put(formatName, suffix);

View File

@ -123,20 +123,27 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
for (int i = 0; i < 3; i++) {
Document doc = new Document();
doc.add(newTextField("id", "1", Field.Store.YES));
doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
doc.add(new KnnVectorField("field2", new float[] {1, 2, 3}));
iw.addDocument(doc);
iw.commit();
}
}
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
WriteRecordingKnnVectorsFormat newFormat =
WriteRecordingKnnVectorsFormat format1 =
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
WriteRecordingKnnVectorsFormat format2 =
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
newConfig.setCodec(
new AssertingCodec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return newFormat;
if ("field1".equals(field)) {
return format1;
} else {
return format2;
}
}
});
@ -145,7 +152,8 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
}
// Check that the new format was used while merging
MatcherAssert.assertThat(newFormat.fieldsWritten, equalTo(Set.of("field")));
MatcherAssert.assertThat(format1.fieldsWritten, equalTo(Set.of("field1")));
MatcherAssert.assertThat(format2.fieldsWritten, equalTo(Set.of("field2")));
}
}