From 9c73562161d9a8ba2063fc9882025fa752855779 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 10 Nov 2021 08:18:01 -0800 Subject: [PATCH] LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432) Before when creating a KnnVectorsWriter for merging, we consulted the existing "PER_FIELD_SUFFIX_KEY" attribute to determine the format's per-field suffix. This isn't correct since we could be using a new codec (that produces different formats/ suffixes). This commit modifies TestPerFieldDocValuesFormat#testMergeUsesNewFormat to trigger the problem. Without the fix we it throws an error like "java.nio.file.FileAlreadyExistsException: File "_3_Lucene90HnswVectorsFormat_0.vem" was already written to." --- .../perfield/PerFieldKnnVectorsFormat.java | 18 +++++------------- .../perfield/TestPerFieldKnnVectorsFormat.java | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java index 0e5cb00d8ce..1ec03dac70d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java @@ -123,25 +123,17 @@ public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat { final String formatName = format.getName(); field.putAttribute(PER_FIELD_FORMAT_KEY, formatName); - Integer suffix = null; + Integer suffix; WriterAndSuffix writerAndSuffix = formats.get(format); if (writerAndSuffix == null) { // First time we are seeing this format; create a new instance - String suffixAtt = field.getAttribute(PER_FIELD_SUFFIX_KEY); - if (suffixAtt != null) { - suffix = Integer.valueOf(suffixAtt); - } - + suffix = suffixes.get(formatName); if (suffix == null) { - // bump the suffix - suffix = suffixes.get(formatName); - if (suffix == null) { - suffix = 0; - } else { - suffix = suffix + 1; - } + suffix = 0; + } else { + suffix = suffix + 1; } suffixes.put(formatName, suffix); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java index bc0eae5c6e3..31371a5b9fa 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java @@ -123,20 +123,27 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase { for (int i = 0; i < 3; i++) { Document doc = new Document(); doc.add(newTextField("id", "1", Field.Store.YES)); - doc.add(new KnnVectorField("field", new float[] {1, 2, 3})); + doc.add(new KnnVectorField("field1", new float[] {1, 2, 3})); + doc.add(new KnnVectorField("field2", new float[] {1, 2, 3})); iw.addDocument(doc); iw.commit(); } } IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random())); - WriteRecordingKnnVectorsFormat newFormat = + WriteRecordingKnnVectorsFormat format1 = + new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat()); + WriteRecordingKnnVectorsFormat format2 = new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat()); newConfig.setCodec( new AssertingCodec() { @Override public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return newFormat; + if ("field1".equals(field)) { + return format1; + } else { + return format2; + } } }); @@ -145,7 +152,8 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase { } // Check that the new format was used while merging - MatcherAssert.assertThat(newFormat.fieldsWritten, equalTo(Set.of("field"))); + MatcherAssert.assertThat(format1.fieldsWritten, equalTo(Set.of("field1"))); + MatcherAssert.assertThat(format2.fieldsWritten, equalTo(Set.of("field2"))); } }