mirror of https://github.com/apache/lucene.git
LUCENE-7456: PerFieldPostings/DocValuesFormat was failing to delegate the merge method
This commit is contained in:
parent
6739e075b4
commit
796ed508f3
|
@ -49,6 +49,9 @@ Bug Fixes
|
|||
* LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are
|
||||
neither BooleanQuery nor TermQuery. (Steve Rowe)
|
||||
|
||||
* LUCENE-7456: PerFieldPostings/DocValues was failing to delegate the
|
||||
merge method (Julien MASSENET via Mike McCandless)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.perfield;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.IdentityHashMap;
|
||||
|
@ -32,6 +33,7 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -127,6 +129,32 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
|
|||
getInstance(field).addSortedSetField(field, valuesProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
Map<DocValuesConsumer, Collection<String>> consumersToField = new IdentityHashMap<>();
|
||||
|
||||
// Group each consumer by the fields it handles
|
||||
for (FieldInfo fi : mergeState.mergeFieldInfos) {
|
||||
DocValuesConsumer consumer = getInstance(fi);
|
||||
Collection<String> fieldsForConsumer = consumersToField.get(consumer);
|
||||
if (fieldsForConsumer == null) {
|
||||
fieldsForConsumer = new ArrayList<>();
|
||||
consumersToField.put(consumer, fieldsForConsumer);
|
||||
}
|
||||
fieldsForConsumer.add(fi.name);
|
||||
}
|
||||
|
||||
// Delegate the merge to the appropriate consumer
|
||||
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
|
||||
try {
|
||||
for (Map.Entry<DocValuesConsumer, Collection<String>> e : consumersToField.entrySet()) {
|
||||
e.getKey().merge(pfMergeState.apply(e.getValue()));
|
||||
}
|
||||
} finally {
|
||||
pfMergeState.reset();
|
||||
}
|
||||
}
|
||||
|
||||
private DocValuesConsumer getInstance(FieldInfo field) throws IOException {
|
||||
DocValuesFormat format = null;
|
||||
if (field.getDocValuesGen() != -1) {
|
||||
|
|
|
@ -0,0 +1,274 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.perfield;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
||||
/**
|
||||
* Utility class to update the {@link MergeState} instance to be restricted to a set of fields.
|
||||
* <p>
|
||||
* Warning: the input {@linkplain MergeState} instance will be updated when calling {@link #apply(Collection)}.
|
||||
* <p>
|
||||
* It should be called within a {@code try {...} finally {...}} block to make sure that the mergeState instance is
|
||||
* restored to its original state:
|
||||
* <pre>
|
||||
* PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
|
||||
* try {
|
||||
* doSomething(pfMergeState.apply(fields));
|
||||
* ...
|
||||
* } finally {
|
||||
* pfMergeState.reset();
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
final class PerFieldMergeState {
|
||||
private final MergeState in;
|
||||
private final FieldInfos orgMergeFieldInfos;
|
||||
private final FieldInfos[] orgFieldInfos;
|
||||
private final FieldsProducer[] orgFieldsProducers;
|
||||
|
||||
PerFieldMergeState(MergeState in) {
|
||||
this.in = in;
|
||||
this.orgMergeFieldInfos = in.mergeFieldInfos;
|
||||
this.orgFieldInfos = new FieldInfos[in.fieldInfos.length];
|
||||
this.orgFieldsProducers = new FieldsProducer[in.fieldsProducers.length];
|
||||
|
||||
System.arraycopy(in.fieldInfos, 0, this.orgFieldInfos, 0, this.orgFieldInfos.length);
|
||||
System.arraycopy(in.fieldsProducers, 0, this.orgFieldsProducers, 0, this.orgFieldsProducers.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the input {@link MergeState} instance to restrict the fields to the given ones.
|
||||
*
|
||||
* @param fields The fields to keep in the updated instance.
|
||||
* @return The updated instance.
|
||||
*/
|
||||
MergeState apply(Collection<String> fields) {
|
||||
in.mergeFieldInfos = new FilterFieldInfos(orgMergeFieldInfos, fields);
|
||||
for (int i = 0; i < orgFieldInfos.length; i++) {
|
||||
in.fieldInfos[i] = new FilterFieldInfos(orgFieldInfos[i], fields);
|
||||
}
|
||||
for (int i = 0; i < orgFieldsProducers.length; i++) {
|
||||
in.fieldsProducers[i] = new FilterFieldsProducer(orgFieldsProducers[i], fields);
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the input {@link MergeState} instance to its original state.
|
||||
*
|
||||
* @return The reset instance.
|
||||
*/
|
||||
MergeState reset() {
|
||||
in.mergeFieldInfos = orgMergeFieldInfos;
|
||||
System.arraycopy(orgFieldInfos, 0, in.fieldInfos, 0, in.fieldInfos.length);
|
||||
System.arraycopy(orgFieldsProducers, 0, in.fieldsProducers, 0, in.fieldsProducers.length);
|
||||
return in;
|
||||
}
|
||||
|
||||
private static class FilterFieldInfos extends FieldInfos {
|
||||
private final Set<String> filteredNames;
|
||||
private final List<FieldInfo> filtered;
|
||||
|
||||
// Copy of the private fields from FieldInfos
|
||||
// Renamed so as to be less confusing about which fields we're referring to
|
||||
private final boolean filteredHasVectors;
|
||||
private final boolean filteredHasProx;
|
||||
private final boolean filteredHasPayloads;
|
||||
private final boolean filteredHasOffsets;
|
||||
private final boolean filteredHasFreq;
|
||||
private final boolean filteredHasNorms;
|
||||
private final boolean filteredHasDocValues;
|
||||
private final boolean filteredHasPointValues;
|
||||
|
||||
FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
|
||||
// Copy all the input FieldInfo objects since the field numbering must be kept consistent
|
||||
super(toArray(src));
|
||||
|
||||
boolean hasVectors = false;
|
||||
boolean hasProx = false;
|
||||
boolean hasPayloads = false;
|
||||
boolean hasOffsets = false;
|
||||
boolean hasFreq = false;
|
||||
boolean hasNorms = false;
|
||||
boolean hasDocValues = false;
|
||||
boolean hasPointValues = false;
|
||||
|
||||
this.filteredNames = new HashSet<>(filterFields);
|
||||
this.filtered = new ArrayList<>(filterFields.size());
|
||||
for (FieldInfo fi : src) {
|
||||
if (filterFields.contains(fi.name)) {
|
||||
this.filtered.add(fi);
|
||||
hasVectors |= fi.hasVectors();
|
||||
hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
|
||||
hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
hasNorms |= fi.hasNorms();
|
||||
hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
|
||||
hasPayloads |= fi.hasPayloads();
|
||||
hasPointValues |= (fi.getPointDimensionCount() != 0);
|
||||
}
|
||||
}
|
||||
|
||||
this.filteredHasVectors = hasVectors;
|
||||
this.filteredHasProx = hasProx;
|
||||
this.filteredHasPayloads = hasPayloads;
|
||||
this.filteredHasOffsets = hasOffsets;
|
||||
this.filteredHasFreq = hasFreq;
|
||||
this.filteredHasNorms = hasNorms;
|
||||
this.filteredHasDocValues = hasDocValues;
|
||||
this.filteredHasPointValues = hasPointValues;
|
||||
}
|
||||
|
||||
private static FieldInfo[] toArray(FieldInfos src) {
|
||||
FieldInfo[] res = new FieldInfo[src.size()];
|
||||
int i = 0;
|
||||
for (FieldInfo fi : src) {
|
||||
res[i++] = fi;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<FieldInfo> iterator() {
|
||||
return filtered.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreq() {
|
||||
return filteredHasFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasProx() {
|
||||
return filteredHasProx;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return filteredHasPayloads;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return filteredHasOffsets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasVectors() {
|
||||
return filteredHasVectors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNorms() {
|
||||
return filteredHasNorms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDocValues() {
|
||||
return filteredHasDocValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPointValues() {
|
||||
return filteredHasPointValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return filtered.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfo fieldInfo(String fieldName) {
|
||||
if (!filteredNames.contains(fieldName)) {
|
||||
// Throw IAE to be consistent with fieldInfo(int) which throws it as well on invalid numbers
|
||||
throw new IllegalArgumentException("The field named '" + fieldName + "' is not accessible in the current " +
|
||||
"merge context, available ones are: " + filteredNames);
|
||||
}
|
||||
return super.fieldInfo(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfo fieldInfo(int fieldNumber) {
|
||||
FieldInfo res = super.fieldInfo(fieldNumber);
|
||||
if (!filteredNames.contains(res.name)) {
|
||||
throw new IllegalArgumentException("The field named '" + res.name + "' numbered '" + fieldNumber + "' is not " +
|
||||
"accessible in the current merge context, available ones are: " + filteredNames);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
private static class FilterFieldsProducer extends FieldsProducer {
|
||||
private final FieldsProducer in;
|
||||
private final List<String> filtered;
|
||||
|
||||
FilterFieldsProducer(FieldsProducer in, Collection<String> filterFields) {
|
||||
this.in = in;
|
||||
this.filtered = new ArrayList<>(filterFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return in.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return filtered.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
if (!filtered.contains(field)) {
|
||||
throw new IllegalArgumentException("The field named '" + field + "' is not accessible in the current " +
|
||||
"merge context, available ones are: " + filtered);
|
||||
}
|
||||
return in.terms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return filtered.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
in.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -39,6 +39,8 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader.FilterFields;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -116,7 +118,61 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public void write(Fields fields) throws IOException {
|
||||
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
|
||||
|
||||
// Write postings
|
||||
boolean success = false;
|
||||
try {
|
||||
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
|
||||
PostingsFormat format = ent.getKey();
|
||||
final FieldsGroup group = ent.getValue();
|
||||
|
||||
// Exposes only the fields from this group:
|
||||
Fields maskedFields = new FilterFields(fields) {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return group.fields.iterator();
|
||||
}
|
||||
};
|
||||
|
||||
FieldsConsumer consumer = format.fieldsConsumer(group.state);
|
||||
toClose.add(consumer);
|
||||
consumer.write(maskedFields);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(toClose);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null));
|
||||
|
||||
// Merge postings
|
||||
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
|
||||
boolean success = false;
|
||||
try {
|
||||
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
|
||||
PostingsFormat format = ent.getKey();
|
||||
final FieldsGroup group = ent.getValue();
|
||||
|
||||
FieldsConsumer consumer = format.fieldsConsumer(group.state);
|
||||
toClose.add(consumer);
|
||||
consumer.merge(pfMergeState.apply(group.fields));
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
pfMergeState.reset();
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(toClose);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(Fields fields) {
|
||||
// Maps a PostingsFormat instance to the suffix it
|
||||
// should use
|
||||
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
|
||||
|
@ -124,7 +180,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
|||
// Holds last suffix of each PostingFormat name
|
||||
Map<String,Integer> suffixes = new HashMap<>();
|
||||
|
||||
// First pass: assign field -> PostingsFormat
|
||||
// Assign field -> PostingsFormat
|
||||
for(String field : fields) {
|
||||
FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
|
||||
|
||||
|
@ -177,32 +233,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
|||
", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix);
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: write postings
|
||||
boolean success = false;
|
||||
try {
|
||||
for(Map.Entry<PostingsFormat,FieldsGroup> ent : formatToGroups.entrySet()) {
|
||||
PostingsFormat format = ent.getKey();
|
||||
final FieldsGroup group = ent.getValue();
|
||||
|
||||
// Exposes only the fields from this group:
|
||||
Fields maskedFields = new FilterFields(fields) {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return group.fields.iterator();
|
||||
}
|
||||
};
|
||||
|
||||
FieldsConsumer consumer = format.fieldsConsumer(group.state);
|
||||
toClose.add(consumer);
|
||||
consumer.write(maskedFields);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success == false) {
|
||||
IOUtils.closeWhileHandlingException(toClose);
|
||||
}
|
||||
}
|
||||
return formatToGroups;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,13 +18,19 @@ package org.apache.lucene.codecs.perfield;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -33,11 +39,15 @@ import org.apache.lucene.document.NumericDocValuesField;
|
|||
import org.apache.lucene.index.BaseDocValuesFormatTestCase;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomCodec;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -127,4 +137,116 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
|
|||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testMergeCalledOnTwoFormats() throws IOException {
|
||||
MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
|
||||
MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(new AssertingCodec() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
switch (field) {
|
||||
case "dv1":
|
||||
case "dv2":
|
||||
return dvf1;
|
||||
|
||||
case "dv3":
|
||||
return dvf2;
|
||||
|
||||
default:
|
||||
return super.getDocValuesFormatForField(field);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Directory directory = newDirectory();
|
||||
|
||||
IndexWriter iwriter = new IndexWriter(directory, iwc);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("dv1", 5));
|
||||
doc.add(new NumericDocValuesField("dv2", 42));
|
||||
doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new NumericDocValuesField("dv1", 8));
|
||||
doc.add(new NumericDocValuesField("dv2", 45));
|
||||
doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
|
||||
iwriter.forceMerge(1, true);
|
||||
iwriter.close();
|
||||
|
||||
assertEquals(1, dvf1.nbMergeCalls);
|
||||
assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
|
||||
assertEquals(1, dvf2.nbMergeCalls);
|
||||
assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
|
||||
|
||||
directory.close();
|
||||
}
|
||||
|
||||
private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat {
|
||||
private final DocValuesFormat delegate;
|
||||
final List<String> fieldNames = new ArrayList<>();
|
||||
volatile int nbMergeCalls = 0;
|
||||
|
||||
MergeRecordingDocValueFormatWrapper(DocValuesFormat delegate) {
|
||||
super(delegate.getName());
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
final DocValuesConsumer consumer = delegate.fieldsConsumer(state);
|
||||
return new DocValuesConsumer() {
|
||||
@Override
|
||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||
consumer.addNumericField(field, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
|
||||
consumer.addBinaryField(field, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
||||
consumer.addSortedField(field, values, docToOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||
consumer.addSortedNumericField(field, docToValueCount, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
||||
consumer.addSortedSetField(field, values, docToOrdCount, ords);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
nbMergeCalls++;
|
||||
for (FieldInfo fi : mergeState.mergeFieldInfos) {
|
||||
fieldNames.add(fi.name);
|
||||
}
|
||||
consumer.merge(mergeState);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
consumer.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return delegate.fieldsProducer(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,9 +18,16 @@ package org.apache.lucene.codecs.perfield;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
|
||||
|
@ -29,14 +36,21 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LogDocMergePolicy;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -322,4 +336,100 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
iw.close();
|
||||
dir.close(); // checkindex
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public void testMergeCalledOnTwoFormats() throws IOException {
|
||||
MergeRecordingPostingsFormatWrapper pf1 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
|
||||
MergeRecordingPostingsFormatWrapper pf2 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(new AssertingCodec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
switch (field) {
|
||||
case "f1":
|
||||
case "f2":
|
||||
return pf1;
|
||||
|
||||
case "f3":
|
||||
case "f4":
|
||||
return pf2;
|
||||
|
||||
default:
|
||||
return super.getPostingsFormatForField(field);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Directory directory = newDirectory();
|
||||
|
||||
IndexWriter iwriter = new IndexWriter(directory, iwc);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("f1", "val1", Field.Store.NO));
|
||||
doc.add(new StringField("f2", "val2", Field.Store.YES));
|
||||
doc.add(new IntPoint("f3", 3)); // Points are not indexed as postings and should not appear in the merge fields
|
||||
doc.add(new StringField("f4", "val4", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("f1", "val5", Field.Store.NO));
|
||||
doc.add(new StringField("f2", "val6", Field.Store.YES));
|
||||
doc.add(new IntPoint("f3", 7));
|
||||
doc.add(new StringField("f4", "val8", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
|
||||
iwriter.forceMerge(1, true);
|
||||
iwriter.close();
|
||||
|
||||
assertEquals(1, pf1.nbMergeCalls);
|
||||
assertEquals(new HashSet<>(Arrays.asList("f1", "f2")), new HashSet<>(pf1.fieldNames));
|
||||
assertEquals(1, pf2.nbMergeCalls);
|
||||
assertEquals(Collections.singletonList("f4"), pf2.fieldNames);
|
||||
|
||||
directory.close();
|
||||
}
|
||||
|
||||
private static final class MergeRecordingPostingsFormatWrapper extends PostingsFormat {
|
||||
private final PostingsFormat delegate;
|
||||
final List<String> fieldNames = new ArrayList<>();
|
||||
int nbMergeCalls = 0;
|
||||
|
||||
MergeRecordingPostingsFormatWrapper(PostingsFormat delegate) {
|
||||
super(delegate.getName());
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
final FieldsConsumer consumer = delegate.fieldsConsumer(state);
|
||||
return new FieldsConsumer() {
|
||||
@Override
|
||||
public void write(Fields fields) throws IOException {
|
||||
consumer.write(fields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
nbMergeCalls++;
|
||||
for (FieldInfo fi : mergeState.mergeFieldInfos) {
|
||||
fieldNames.add(fi.name);
|
||||
}
|
||||
consumer.merge(mergeState);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
consumer.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return delegate.fieldsProducer(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue