From de6233976a25d7c95ef985e7a6694c029c426bf5 Mon Sep 17 00:00:00 2001 From: Juan Camilo Rodriguez Duran Date: Tue, 7 Apr 2020 16:11:25 -0400 Subject: [PATCH] LUCENE-8050: PerFieldDocValuesFormat should not get the DocValuesFormat on a field that has no doc values. Closes #1408 --- lucene/CHANGES.txt | 3 ++ .../perfield/PerFieldDocValuesFormat.java | 3 ++ .../perfield/TestPerFieldDocValuesFormat.java | 40 ++++++++++++++++++- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1e5b8f791ab..38c262fd285 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,9 @@ Improvements * LUCENE-9279: Update dictionary version for Ukrainian analyzer to 4.9.1 (Andriy Rysin via Dawid Weiss) +* LUCENE-8050: PerFieldDocValuesFormat should not get the DocValuesFormat on a field that has no doc values. + (David Smiley, Juan Rodriguez) + Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index 1d031769b8a..a882e580b89 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -135,6 +135,9 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { // Group each consumer by the fields it handles for (FieldInfo fi : mergeState.mergeFieldInfos) { + if (fi.getDocValuesType() == DocValuesType.NONE) { + continue; + } // merge should ignore current format for the fields being merged DocValuesConsumer consumer = getInstance(fi, true); Collection fieldsForConsumer = consumersToField.get(consumer); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java index 7bfc14d2f29..213987f3e26 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java @@ -36,6 +36,7 @@ import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.BaseDocValuesFormatTestCase; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; @@ -65,7 +66,7 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase { @Override public void setUp() throws Exception { - codec = new RandomCodec(new Random(random().nextLong()), Collections.emptySet()); + codec = new RandomCodec(new Random(random().nextLong()), Collections.emptySet()); super.setUp(); } @@ -189,6 +190,43 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase { directory.close(); } + public void testDocValuesMergeWithIndexedFields() throws IOException { + MergeRecordingDocValueFormatWrapper docValuesFormat = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat()); + + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setCodec(new AssertingCodec() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return docValuesFormat; + } + }); + + Directory directory = newDirectory(); + + IndexWriter iwriter = new IndexWriter(directory, iwc); + + Document doc = new Document(); + doc.add(new NumericDocValuesField("dv1", 5)); + doc.add(new TextField("normalField", "not a doc value", Field.Store.NO)); + iwriter.addDocument(doc); + iwriter.commit(); + + doc = new Document(); + doc.add(new TextField("anotherField", "again no doc values here", Field.Store.NO)); + doc.add(new TextField("normalField", "my document without doc values", Field.Store.NO)); + iwriter.addDocument(doc); + iwriter.commit(); + + + iwriter.forceMerge(1, true); + iwriter.close(); + + // "normalField" and "anotherField" are ignored when merging doc values. + assertEquals(1, docValuesFormat.nbMergeCalls); + assertEquals(Collections.singletonList("dv1"), docValuesFormat.fieldNames); + directory.close(); + } + private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat { private final DocValuesFormat delegate; final List fieldNames = new ArrayList<>();