diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f7522177895..32f6b518e08 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -49,6 +49,9 @@ Bug Fixes * LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are neither BooleanQuery nor TermQuery. (Steve Rowe) +* LUCENE-7456: PerFieldPostings/DocValues was failing to delegate the + merge method (Julien MASSENET via Mike McCandless) + Improvements * LUCENE-7439: FuzzyQuery now matches all terms within the specified diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index 6772cadde08..eef232cfcd1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.perfield; import java.io.Closeable; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.IdentityHashMap; @@ -32,6 +33,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.MergeState; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; @@ -127,6 +129,32 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { getInstance(field).addSortedSetField(field, valuesProducer); } + @Override + public void merge(MergeState mergeState) throws IOException { + Map> consumersToField = new IdentityHashMap<>(); + + // Group each consumer by the fields it handles + for (FieldInfo fi : mergeState.mergeFieldInfos) { + DocValuesConsumer consumer = getInstance(fi); + Collection fieldsForConsumer = consumersToField.get(consumer); + if (fieldsForConsumer == null) { + fieldsForConsumer = new ArrayList<>(); + consumersToField.put(consumer, fieldsForConsumer); + } + fieldsForConsumer.add(fi.name); + } + + // Delegate the merge to the appropriate consumer + PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState); + try { + for (Map.Entry> e : consumersToField.entrySet()) { + e.getKey().merge(pfMergeState.apply(e.getValue())); + } + } finally { + pfMergeState.reset(); + } + } + private DocValuesConsumer getInstance(FieldInfo field) throws IOException { DocValuesFormat format = null; if (field.getDocValuesGen() != -1) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java new file mode 100644 index 00000000000..991eedfe78e --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.perfield; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.Terms; + +/** + * Utility class to update the {@link MergeState} instance to be restricted to a set of fields. + *

+ * Warning: the input {@linkplain MergeState} instance will be updated when calling {@link #apply(Collection)}. + *

+ * It should be called within a {@code try {...} finally {...}} block to make sure that the mergeState instance is + * restored to its original state: + *

+ * PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
+ * try {
+ *   doSomething(pfMergeState.apply(fields));
+ *   ...
+ * } finally {
+ *   pfMergeState.reset();
+ * }
+ * 
+ */ +final class PerFieldMergeState { + private final MergeState in; + private final FieldInfos orgMergeFieldInfos; + private final FieldInfos[] orgFieldInfos; + private final FieldsProducer[] orgFieldsProducers; + + PerFieldMergeState(MergeState in) { + this.in = in; + this.orgMergeFieldInfos = in.mergeFieldInfos; + this.orgFieldInfos = new FieldInfos[in.fieldInfos.length]; + this.orgFieldsProducers = new FieldsProducer[in.fieldsProducers.length]; + + System.arraycopy(in.fieldInfos, 0, this.orgFieldInfos, 0, this.orgFieldInfos.length); + System.arraycopy(in.fieldsProducers, 0, this.orgFieldsProducers, 0, this.orgFieldsProducers.length); + } + + /** + * Update the input {@link MergeState} instance to restrict the fields to the given ones. + * + * @param fields The fields to keep in the updated instance. + * @return The updated instance. + */ + MergeState apply(Collection fields) { + in.mergeFieldInfos = new FilterFieldInfos(orgMergeFieldInfos, fields); + for (int i = 0; i < orgFieldInfos.length; i++) { + in.fieldInfos[i] = new FilterFieldInfos(orgFieldInfos[i], fields); + } + for (int i = 0; i < orgFieldsProducers.length; i++) { + in.fieldsProducers[i] = new FilterFieldsProducer(orgFieldsProducers[i], fields); + } + return in; + } + + /** + * Resets the input {@link MergeState} instance to its original state. + * + * @return The reset instance. + */ + MergeState reset() { + in.mergeFieldInfos = orgMergeFieldInfos; + System.arraycopy(orgFieldInfos, 0, in.fieldInfos, 0, in.fieldInfos.length); + System.arraycopy(orgFieldsProducers, 0, in.fieldsProducers, 0, in.fieldsProducers.length); + return in; + } + + private static class FilterFieldInfos extends FieldInfos { + private final Set filteredNames; + private final List filtered; + + // Copy of the private fields from FieldInfos + // Renamed so as to be less confusing about which fields we're referring to + private final boolean filteredHasVectors; + private final boolean filteredHasProx; + private final boolean filteredHasPayloads; + private final boolean filteredHasOffsets; + private final boolean filteredHasFreq; + private final boolean filteredHasNorms; + private final boolean filteredHasDocValues; + private final boolean filteredHasPointValues; + + FilterFieldInfos(FieldInfos src, Collection filterFields) { + // Copy all the input FieldInfo objects since the field numbering must be kept consistent + super(toArray(src)); + + boolean hasVectors = false; + boolean hasProx = false; + boolean hasPayloads = false; + boolean hasOffsets = false; + boolean hasFreq = false; + boolean hasNorms = false; + boolean hasDocValues = false; + boolean hasPointValues = false; + + this.filteredNames = new HashSet<>(filterFields); + this.filtered = new ArrayList<>(filterFields.size()); + for (FieldInfo fi : src) { + if (filterFields.contains(fi.name)) { + this.filtered.add(fi); + hasVectors |= fi.hasVectors(); + hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS; + hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + hasNorms |= fi.hasNorms(); + hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE; + hasPayloads |= fi.hasPayloads(); + hasPointValues |= (fi.getPointDimensionCount() != 0); + } + } + + this.filteredHasVectors = hasVectors; + this.filteredHasProx = hasProx; + this.filteredHasPayloads = hasPayloads; + this.filteredHasOffsets = hasOffsets; + this.filteredHasFreq = hasFreq; + this.filteredHasNorms = hasNorms; + this.filteredHasDocValues = hasDocValues; + this.filteredHasPointValues = hasPointValues; + } + + private static FieldInfo[] toArray(FieldInfos src) { + FieldInfo[] res = new FieldInfo[src.size()]; + int i = 0; + for (FieldInfo fi : src) { + res[i++] = fi; + } + return res; + } + + @Override + public Iterator iterator() { + return filtered.iterator(); + } + + @Override + public boolean hasFreq() { + return filteredHasFreq; + } + + @Override + public boolean hasProx() { + return filteredHasProx; + } + + @Override + public boolean hasPayloads() { + return filteredHasPayloads; + } + + @Override + public boolean hasOffsets() { + return filteredHasOffsets; + } + + @Override + public boolean hasVectors() { + return filteredHasVectors; + } + + @Override + public boolean hasNorms() { + return filteredHasNorms; + } + + @Override + public boolean hasDocValues() { + return filteredHasDocValues; + } + + @Override + public boolean hasPointValues() { + return filteredHasPointValues; + } + + @Override + public int size() { + return filtered.size(); + } + + @Override + public FieldInfo fieldInfo(String fieldName) { + if (!filteredNames.contains(fieldName)) { + // Throw IAE to be consistent with fieldInfo(int) which throws it as well on invalid numbers + throw new IllegalArgumentException("The field named '" + fieldName + "' is not accessible in the current " + + "merge context, available ones are: " + filteredNames); + } + return super.fieldInfo(fieldName); + } + + @Override + public FieldInfo fieldInfo(int fieldNumber) { + FieldInfo res = super.fieldInfo(fieldNumber); + if (!filteredNames.contains(res.name)) { + throw new IllegalArgumentException("The field named '" + res.name + "' numbered '" + fieldNumber + "' is not " + + "accessible in the current merge context, available ones are: " + filteredNames); + } + return res; + } + } + + private static class FilterFieldsProducer extends FieldsProducer { + private final FieldsProducer in; + private final List filtered; + + FilterFieldsProducer(FieldsProducer in, Collection filterFields) { + this.in = in; + this.filtered = new ArrayList<>(filterFields); + } + + @Override + public long ramBytesUsed() { + return in.ramBytesUsed(); + } + + @Override + public Iterator iterator() { + return filtered.iterator(); + } + + @Override + public Terms terms(String field) throws IOException { + if (!filtered.contains(field)) { + throw new IllegalArgumentException("The field named '" + field + "' is not accessible in the current " + + "merge context, available ones are: " + filtered); + } + return in.terms(field); + } + + @Override + public int size() { + return filtered.size(); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java index e49d2ff8cd1..281b08fe374 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java @@ -39,6 +39,8 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FilterLeafReader.FilterFields; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; @@ -116,7 +118,61 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { @Override public void write(Fields fields) throws IOException { + Map formatToGroups = buildFieldsGroupMapping(fields); + // Write postings + boolean success = false; + try { + for (Map.Entry ent : formatToGroups.entrySet()) { + PostingsFormat format = ent.getKey(); + final FieldsGroup group = ent.getValue(); + + // Exposes only the fields from this group: + Fields maskedFields = new FilterFields(fields) { + @Override + public Iterator iterator() { + return group.fields.iterator(); + } + }; + + FieldsConsumer consumer = format.fieldsConsumer(group.state); + toClose.add(consumer); + consumer.write(maskedFields); + } + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(toClose); + } + } + } + + @Override + public void merge(MergeState mergeState) throws IOException { + Map formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null)); + + // Merge postings + PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState); + boolean success = false; + try { + for (Map.Entry ent : formatToGroups.entrySet()) { + PostingsFormat format = ent.getKey(); + final FieldsGroup group = ent.getValue(); + + FieldsConsumer consumer = format.fieldsConsumer(group.state); + toClose.add(consumer); + consumer.merge(pfMergeState.apply(group.fields)); + } + success = true; + } finally { + pfMergeState.reset(); + if (!success) { + IOUtils.closeWhileHandlingException(toClose); + } + } + } + + private Map buildFieldsGroupMapping(Fields fields) { // Maps a PostingsFormat instance to the suffix it // should use Map formatToGroups = new HashMap<>(); @@ -124,7 +180,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { // Holds last suffix of each PostingFormat name Map suffixes = new HashMap<>(); - // First pass: assign field -> PostingsFormat + // Assign field -> PostingsFormat for(String field : fields) { FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field); @@ -177,32 +233,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { ", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix); } } - - // Second pass: write postings - boolean success = false; - try { - for(Map.Entry ent : formatToGroups.entrySet()) { - PostingsFormat format = ent.getKey(); - final FieldsGroup group = ent.getValue(); - - // Exposes only the fields from this group: - Fields maskedFields = new FilterFields(fields) { - @Override - public Iterator iterator() { - return group.fields.iterator(); - } - }; - - FieldsConsumer consumer = format.fieldsConsumer(group.state); - toClose.add(consumer); - consumer.write(maskedFields); - } - success = true; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(toClose); - } - } + return formatToGroups; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java index 5777dadf4c8..5b985a4f5f1 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java @@ -18,13 +18,19 @@ package org.apache.lucene.codecs.perfield; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; +import java.util.List; import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; @@ -33,11 +39,15 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.BaseDocValuesFormatTestCase; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergeState; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomCodec; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -127,4 +137,116 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase { ireader.close(); directory.close(); } + + public void testMergeCalledOnTwoFormats() throws IOException { + MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat()); + MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat()); + + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(new AssertingCodec() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + switch (field) { + case "dv1": + case "dv2": + return dvf1; + + case "dv3": + return dvf2; + + default: + return super.getDocValuesFormatForField(field); + } + } + }); + + Directory directory = newDirectory(); + + IndexWriter iwriter = new IndexWriter(directory, iwc); + + Document doc = new Document(); + doc.add(new NumericDocValuesField("dv1", 5)); + doc.add(new NumericDocValuesField("dv2", 42)); + doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world"))); + iwriter.addDocument(doc); + iwriter.commit(); + + doc = new Document(); + doc.add(new NumericDocValuesField("dv1", 8)); + doc.add(new NumericDocValuesField("dv2", 45)); + doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world"))); + iwriter.addDocument(doc); + iwriter.commit(); + + iwriter.forceMerge(1, true); + iwriter.close(); + + assertEquals(1, dvf1.nbMergeCalls); + assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames)); + assertEquals(1, dvf2.nbMergeCalls); + assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames); + + directory.close(); + } + + private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat { + private final DocValuesFormat delegate; + final List fieldNames = new ArrayList<>(); + volatile int nbMergeCalls = 0; + + MergeRecordingDocValueFormatWrapper(DocValuesFormat delegate) { + super(delegate.getName()); + this.delegate = delegate; + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + final DocValuesConsumer consumer = delegate.fieldsConsumer(state); + return new DocValuesConsumer() { + @Override + public void addNumericField(FieldInfo field, Iterable values) throws IOException { + consumer.addNumericField(field, values); + } + + @Override + public void addBinaryField(FieldInfo field, Iterable values) throws IOException { + consumer.addBinaryField(field, values); + } + + @Override + public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + consumer.addSortedField(field, values, docToOrd); + } + + @Override + public void addSortedNumericField(FieldInfo field, Iterable docToValueCount, Iterable values) throws IOException { + consumer.addSortedNumericField(field, docToValueCount, values); + } + + @Override + public void addSortedSetField(FieldInfo field, Iterable values, Iterable docToOrdCount, Iterable ords) throws IOException { + consumer.addSortedSetField(field, values, docToOrdCount, ords); + } + + @Override + public void merge(MergeState mergeState) throws IOException { + nbMergeCalls++; + for (FieldInfo fi : mergeState.mergeFieldInfos) { + fieldNames.add(fi.name); + } + consumer.merge(mergeState); + } + + @Override + public void close() throws IOException { + consumer.close(); + } + }; + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return delegate.fieldsProducer(state); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java index 58c37fc525b..959a2b74118 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java @@ -18,9 +18,16 @@ package org.apache.lucene.codecs.perfield; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval; @@ -29,14 +36,21 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.MergeState; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; @@ -322,4 +336,100 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { iw.close(); dir.close(); // checkindex } + + @SuppressWarnings("deprecation") + public void testMergeCalledOnTwoFormats() throws IOException { + MergeRecordingPostingsFormatWrapper pf1 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat()); + MergeRecordingPostingsFormatWrapper pf2 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat()); + + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(new AssertingCodec() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + switch (field) { + case "f1": + case "f2": + return pf1; + + case "f3": + case "f4": + return pf2; + + default: + return super.getPostingsFormatForField(field); + } + } + }); + + Directory directory = newDirectory(); + + IndexWriter iwriter = new IndexWriter(directory, iwc); + + Document doc = new Document(); + doc.add(new StringField("f1", "val1", Field.Store.NO)); + doc.add(new StringField("f2", "val2", Field.Store.YES)); + doc.add(new IntPoint("f3", 3)); // Points are not indexed as postings and should not appear in the merge fields + doc.add(new StringField("f4", "val4", Field.Store.NO)); + iwriter.addDocument(doc); + iwriter.commit(); + + doc = new Document(); + doc.add(new StringField("f1", "val5", Field.Store.NO)); + doc.add(new StringField("f2", "val6", Field.Store.YES)); + doc.add(new IntPoint("f3", 7)); + doc.add(new StringField("f4", "val8", Field.Store.NO)); + iwriter.addDocument(doc); + iwriter.commit(); + + iwriter.forceMerge(1, true); + iwriter.close(); + + assertEquals(1, pf1.nbMergeCalls); + assertEquals(new HashSet<>(Arrays.asList("f1", "f2")), new HashSet<>(pf1.fieldNames)); + assertEquals(1, pf2.nbMergeCalls); + assertEquals(Collections.singletonList("f4"), pf2.fieldNames); + + directory.close(); + } + + private static final class MergeRecordingPostingsFormatWrapper extends PostingsFormat { + private final PostingsFormat delegate; + final List fieldNames = new ArrayList<>(); + int nbMergeCalls = 0; + + MergeRecordingPostingsFormatWrapper(PostingsFormat delegate) { + super(delegate.getName()); + this.delegate = delegate; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + final FieldsConsumer consumer = delegate.fieldsConsumer(state); + return new FieldsConsumer() { + @Override + public void write(Fields fields) throws IOException { + consumer.write(fields); + } + + @Override + public void merge(MergeState mergeState) throws IOException { + nbMergeCalls++; + for (FieldInfo fi : mergeState.mergeFieldInfos) { + fieldNames.add(fi.name); + } + consumer.merge(mergeState); + } + + @Override + public void close() throws IOException { + consumer.close(); + } + }; + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + return delegate.fieldsProducer(state); + } + } }