LUCENE-7456: PerFieldPostings/DocValuesFormat was failing to delegate the merge method

This commit is contained in:
Mike McCandless 2016-10-04 11:31:33 -04:00
parent 6739e075b4
commit 796ed508f3
6 changed files with 595 additions and 27 deletions

View File

@ -49,6 +49,9 @@ Bug Fixes
* LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are * LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are
neither BooleanQuery nor TermQuery. (Steve Rowe) neither BooleanQuery nor TermQuery. (Steve Rowe)
* LUCENE-7456: PerFieldPostings/DocValues was failing to delegate the
merge method (Julien MASSENET via Mike McCandless)
Improvements Improvements
* LUCENE-7439: FuzzyQuery now matches all terms within the specified * LUCENE-7439: FuzzyQuery now matches all terms within the specified

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.perfield;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
@ -32,6 +33,7 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
@ -127,6 +129,32 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
getInstance(field).addSortedSetField(field, valuesProducer); getInstance(field).addSortedSetField(field, valuesProducer);
} }
@Override
public void merge(MergeState mergeState) throws IOException {
Map<DocValuesConsumer, Collection<String>> consumersToField = new IdentityHashMap<>();
// Group each consumer by the fields it handles
for (FieldInfo fi : mergeState.mergeFieldInfos) {
DocValuesConsumer consumer = getInstance(fi);
Collection<String> fieldsForConsumer = consumersToField.get(consumer);
if (fieldsForConsumer == null) {
fieldsForConsumer = new ArrayList<>();
consumersToField.put(consumer, fieldsForConsumer);
}
fieldsForConsumer.add(fi.name);
}
// Delegate the merge to the appropriate consumer
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
try {
for (Map.Entry<DocValuesConsumer, Collection<String>> e : consumersToField.entrySet()) {
e.getKey().merge(pfMergeState.apply(e.getValue()));
}
} finally {
pfMergeState.reset();
}
}
private DocValuesConsumer getInstance(FieldInfo field) throws IOException { private DocValuesConsumer getInstance(FieldInfo field) throws IOException {
DocValuesFormat format = null; DocValuesFormat format = null;
if (field.getDocValuesGen() != -1) { if (field.getDocValuesGen() != -1) {

View File

@ -0,0 +1,274 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.perfield;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.Terms;
/**
* Utility class to update the {@link MergeState} instance to be restricted to a set of fields.
* <p>
* Warning: the input {@linkplain MergeState} instance will be updated when calling {@link #apply(Collection)}.
* <p>
* It should be called within a {@code try &#123;...&#125; finally &#123;...&#125;} block to make sure that the mergeState instance is
* restored to its original state:
* <pre>
* PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
* try {
* doSomething(pfMergeState.apply(fields));
* ...
* } finally {
* pfMergeState.reset();
* }
* </pre>
*/
final class PerFieldMergeState {
private final MergeState in;
private final FieldInfos orgMergeFieldInfos;
private final FieldInfos[] orgFieldInfos;
private final FieldsProducer[] orgFieldsProducers;
PerFieldMergeState(MergeState in) {
this.in = in;
this.orgMergeFieldInfos = in.mergeFieldInfos;
this.orgFieldInfos = new FieldInfos[in.fieldInfos.length];
this.orgFieldsProducers = new FieldsProducer[in.fieldsProducers.length];
System.arraycopy(in.fieldInfos, 0, this.orgFieldInfos, 0, this.orgFieldInfos.length);
System.arraycopy(in.fieldsProducers, 0, this.orgFieldsProducers, 0, this.orgFieldsProducers.length);
}
/**
* Update the input {@link MergeState} instance to restrict the fields to the given ones.
*
* @param fields The fields to keep in the updated instance.
* @return The updated instance.
*/
MergeState apply(Collection<String> fields) {
in.mergeFieldInfos = new FilterFieldInfos(orgMergeFieldInfos, fields);
for (int i = 0; i < orgFieldInfos.length; i++) {
in.fieldInfos[i] = new FilterFieldInfos(orgFieldInfos[i], fields);
}
for (int i = 0; i < orgFieldsProducers.length; i++) {
in.fieldsProducers[i] = new FilterFieldsProducer(orgFieldsProducers[i], fields);
}
return in;
}
/**
* Resets the input {@link MergeState} instance to its original state.
*
* @return The reset instance.
*/
MergeState reset() {
in.mergeFieldInfos = orgMergeFieldInfos;
System.arraycopy(orgFieldInfos, 0, in.fieldInfos, 0, in.fieldInfos.length);
System.arraycopy(orgFieldsProducers, 0, in.fieldsProducers, 0, in.fieldsProducers.length);
return in;
}
private static class FilterFieldInfos extends FieldInfos {
private final Set<String> filteredNames;
private final List<FieldInfo> filtered;
// Copy of the private fields from FieldInfos
// Renamed so as to be less confusing about which fields we're referring to
private final boolean filteredHasVectors;
private final boolean filteredHasProx;
private final boolean filteredHasPayloads;
private final boolean filteredHasOffsets;
private final boolean filteredHasFreq;
private final boolean filteredHasNorms;
private final boolean filteredHasDocValues;
private final boolean filteredHasPointValues;
FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
// Copy all the input FieldInfo objects since the field numbering must be kept consistent
super(toArray(src));
boolean hasVectors = false;
boolean hasProx = false;
boolean hasPayloads = false;
boolean hasOffsets = false;
boolean hasFreq = false;
boolean hasNorms = false;
boolean hasDocValues = false;
boolean hasPointValues = false;
this.filteredNames = new HashSet<>(filterFields);
this.filtered = new ArrayList<>(filterFields.size());
for (FieldInfo fi : src) {
if (filterFields.contains(fi.name)) {
this.filtered.add(fi);
hasVectors |= fi.hasVectors();
hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
hasNorms |= fi.hasNorms();
hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
hasPayloads |= fi.hasPayloads();
hasPointValues |= (fi.getPointDimensionCount() != 0);
}
}
this.filteredHasVectors = hasVectors;
this.filteredHasProx = hasProx;
this.filteredHasPayloads = hasPayloads;
this.filteredHasOffsets = hasOffsets;
this.filteredHasFreq = hasFreq;
this.filteredHasNorms = hasNorms;
this.filteredHasDocValues = hasDocValues;
this.filteredHasPointValues = hasPointValues;
}
private static FieldInfo[] toArray(FieldInfos src) {
FieldInfo[] res = new FieldInfo[src.size()];
int i = 0;
for (FieldInfo fi : src) {
res[i++] = fi;
}
return res;
}
@Override
public Iterator<FieldInfo> iterator() {
return filtered.iterator();
}
@Override
public boolean hasFreq() {
return filteredHasFreq;
}
@Override
public boolean hasProx() {
return filteredHasProx;
}
@Override
public boolean hasPayloads() {
return filteredHasPayloads;
}
@Override
public boolean hasOffsets() {
return filteredHasOffsets;
}
@Override
public boolean hasVectors() {
return filteredHasVectors;
}
@Override
public boolean hasNorms() {
return filteredHasNorms;
}
@Override
public boolean hasDocValues() {
return filteredHasDocValues;
}
@Override
public boolean hasPointValues() {
return filteredHasPointValues;
}
@Override
public int size() {
return filtered.size();
}
@Override
public FieldInfo fieldInfo(String fieldName) {
if (!filteredNames.contains(fieldName)) {
// Throw IAE to be consistent with fieldInfo(int) which throws it as well on invalid numbers
throw new IllegalArgumentException("The field named '" + fieldName + "' is not accessible in the current " +
"merge context, available ones are: " + filteredNames);
}
return super.fieldInfo(fieldName);
}
@Override
public FieldInfo fieldInfo(int fieldNumber) {
FieldInfo res = super.fieldInfo(fieldNumber);
if (!filteredNames.contains(res.name)) {
throw new IllegalArgumentException("The field named '" + res.name + "' numbered '" + fieldNumber + "' is not " +
"accessible in the current merge context, available ones are: " + filteredNames);
}
return res;
}
}
private static class FilterFieldsProducer extends FieldsProducer {
private final FieldsProducer in;
private final List<String> filtered;
FilterFieldsProducer(FieldsProducer in, Collection<String> filterFields) {
this.in = in;
this.filtered = new ArrayList<>(filterFields);
}
@Override
public long ramBytesUsed() {
return in.ramBytesUsed();
}
@Override
public Iterator<String> iterator() {
return filtered.iterator();
}
@Override
public Terms terms(String field) throws IOException {
if (!filtered.contains(field)) {
throw new IllegalArgumentException("The field named '" + field + "' is not accessible in the current " +
"merge context, available ones are: " + filtered);
}
return in.terms(field);
}
@Override
public int size() {
return filtered.size();
}
@Override
public void close() throws IOException {
in.close();
}
@Override
public void checkIntegrity() throws IOException {
in.checkIntegrity();
}
}
}

View File

@ -39,6 +39,8 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader.FilterFields; import org.apache.lucene.index.FilterLeafReader.FilterFields;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -116,7 +118,61 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
@Override @Override
public void write(Fields fields) throws IOException { public void write(Fields fields) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
// Write postings
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
// Exposes only the fields from this group:
Fields maskedFields = new FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return group.fields.iterator();
}
};
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.write(maskedFields);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
@Override
public void merge(MergeState mergeState) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null));
// Merge postings
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.merge(pfMergeState.apply(group.fields));
}
success = true;
} finally {
pfMergeState.reset();
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(Fields fields) {
// Maps a PostingsFormat instance to the suffix it // Maps a PostingsFormat instance to the suffix it
// should use // should use
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>(); Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
@ -124,7 +180,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
// Holds last suffix of each PostingFormat name // Holds last suffix of each PostingFormat name
Map<String,Integer> suffixes = new HashMap<>(); Map<String,Integer> suffixes = new HashMap<>();
// First pass: assign field -> PostingsFormat // Assign field -> PostingsFormat
for(String field : fields) { for(String field : fields) {
FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field); FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
@ -177,32 +233,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix); ", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix);
} }
} }
return formatToGroups;
// Second pass: write postings
boolean success = false;
try {
for(Map.Entry<PostingsFormat,FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
// Exposes only the fields from this group:
Fields maskedFields = new FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return group.fields.iterator();
}
};
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.write(maskedFields);
}
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(toClose);
}
}
} }
@Override @Override

View File

@ -18,13 +18,19 @@ package org.apache.lucene.codecs.perfield;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -33,11 +39,15 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.BaseDocValuesFormatTestCase; import org.apache.lucene.index.BaseDocValuesFormatTestCase;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomCodec; import org.apache.lucene.index.RandomCodec;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -127,4 +137,116 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
ireader.close(); ireader.close();
directory.close(); directory.close();
} }
public void testMergeCalledOnTwoFormats() throws IOException {
MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
switch (field) {
case "dv1":
case "dv2":
return dvf1;
case "dv3":
return dvf2;
default:
return super.getDocValuesFormatForField(field);
}
}
});
Directory directory = newDirectory();
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new NumericDocValuesField("dv2", 42));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new NumericDocValuesField("dv1", 8));
doc.add(new NumericDocValuesField("dv2", 45));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1, true);
iwriter.close();
assertEquals(1, dvf1.nbMergeCalls);
assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
assertEquals(1, dvf2.nbMergeCalls);
assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
directory.close();
}
private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat {
private final DocValuesFormat delegate;
final List<String> fieldNames = new ArrayList<>();
volatile int nbMergeCalls = 0;
MergeRecordingDocValueFormatWrapper(DocValuesFormat delegate) {
super(delegate.getName());
this.delegate = delegate;
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final DocValuesConsumer consumer = delegate.fieldsConsumer(state);
return new DocValuesConsumer() {
@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
consumer.addNumericField(field, values);
}
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
consumer.addBinaryField(field, values);
}
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
consumer.addSortedField(field, values, docToOrd);
}
@Override
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
consumer.addSortedNumericField(field, docToValueCount, values);
}
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
consumer.addSortedSetField(field, values, docToOrdCount, ords);
}
@Override
public void merge(MergeState mergeState) throws IOException {
nbMergeCalls++;
for (FieldInfo fi : mergeState.mergeFieldInfos) {
fieldNames.add(fi.name);
}
consumer.merge(mergeState);
}
@Override
public void close() throws IOException {
consumer.close();
}
};
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return delegate.fieldsProducer(state);
}
}
} }

View File

@ -18,9 +18,16 @@ package org.apache.lucene.codecs.perfield;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval; import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
@ -29,14 +36,21 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
@ -322,4 +336,100 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
iw.close(); iw.close();
dir.close(); // checkindex dir.close(); // checkindex
} }
@SuppressWarnings("deprecation")
public void testMergeCalledOnTwoFormats() throws IOException {
MergeRecordingPostingsFormatWrapper pf1 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
MergeRecordingPostingsFormatWrapper pf2 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
switch (field) {
case "f1":
case "f2":
return pf1;
case "f3":
case "f4":
return pf2;
default:
return super.getPostingsFormatForField(field);
}
}
});
Directory directory = newDirectory();
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new StringField("f1", "val1", Field.Store.NO));
doc.add(new StringField("f2", "val2", Field.Store.YES));
doc.add(new IntPoint("f3", 3)); // Points are not indexed as postings and should not appear in the merge fields
doc.add(new StringField("f4", "val4", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new StringField("f1", "val5", Field.Store.NO));
doc.add(new StringField("f2", "val6", Field.Store.YES));
doc.add(new IntPoint("f3", 7));
doc.add(new StringField("f4", "val8", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1, true);
iwriter.close();
assertEquals(1, pf1.nbMergeCalls);
assertEquals(new HashSet<>(Arrays.asList("f1", "f2")), new HashSet<>(pf1.fieldNames));
assertEquals(1, pf2.nbMergeCalls);
assertEquals(Collections.singletonList("f4"), pf2.fieldNames);
directory.close();
}
private static final class MergeRecordingPostingsFormatWrapper extends PostingsFormat {
private final PostingsFormat delegate;
final List<String> fieldNames = new ArrayList<>();
int nbMergeCalls = 0;
MergeRecordingPostingsFormatWrapper(PostingsFormat delegate) {
super(delegate.getName());
this.delegate = delegate;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final FieldsConsumer consumer = delegate.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
public void write(Fields fields) throws IOException {
consumer.write(fields);
}
@Override
public void merge(MergeState mergeState) throws IOException {
nbMergeCalls++;
for (FieldInfo fi : mergeState.mergeFieldInfos) {
fieldNames.add(fi.name);
}
consumer.merge(mergeState);
}
@Override
public void close() throws IOException {
consumer.close();
}
};
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return delegate.fieldsProducer(state);
}
}
} }