mirror of https://github.com/apache/lucene.git
LUCENE-7456: PerFieldPostings/DocValuesFormat was failing to delegate the merge method
This commit is contained in:
parent
6739e075b4
commit
796ed508f3
|
@ -49,6 +49,9 @@ Bug Fixes
|
||||||
* LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are
|
* LUCENE-7472: MultiFieldQueryParser.getFieldQuery() drops queries that are
|
||||||
neither BooleanQuery nor TermQuery. (Steve Rowe)
|
neither BooleanQuery nor TermQuery. (Steve Rowe)
|
||||||
|
|
||||||
|
* LUCENE-7456: PerFieldPostings/DocValues was failing to delegate the
|
||||||
|
merge method (Julien MASSENET via Mike McCandless)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
|
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.perfield;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.IdentityHashMap;
|
import java.util.IdentityHashMap;
|
||||||
|
@ -32,6 +33,7 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -127,6 +129,32 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
|
||||||
getInstance(field).addSortedSetField(field, valuesProducer);
|
getInstance(field).addSortedSetField(field, valuesProducer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
Map<DocValuesConsumer, Collection<String>> consumersToField = new IdentityHashMap<>();
|
||||||
|
|
||||||
|
// Group each consumer by the fields it handles
|
||||||
|
for (FieldInfo fi : mergeState.mergeFieldInfos) {
|
||||||
|
DocValuesConsumer consumer = getInstance(fi);
|
||||||
|
Collection<String> fieldsForConsumer = consumersToField.get(consumer);
|
||||||
|
if (fieldsForConsumer == null) {
|
||||||
|
fieldsForConsumer = new ArrayList<>();
|
||||||
|
consumersToField.put(consumer, fieldsForConsumer);
|
||||||
|
}
|
||||||
|
fieldsForConsumer.add(fi.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delegate the merge to the appropriate consumer
|
||||||
|
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
|
||||||
|
try {
|
||||||
|
for (Map.Entry<DocValuesConsumer, Collection<String>> e : consumersToField.entrySet()) {
|
||||||
|
e.getKey().merge(pfMergeState.apply(e.getValue()));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
pfMergeState.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private DocValuesConsumer getInstance(FieldInfo field) throws IOException {
|
private DocValuesConsumer getInstance(FieldInfo field) throws IOException {
|
||||||
DocValuesFormat format = null;
|
DocValuesFormat format = null;
|
||||||
if (field.getDocValuesGen() != -1) {
|
if (field.getDocValuesGen() != -1) {
|
||||||
|
|
|
@ -0,0 +1,274 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.codecs.perfield;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class to update the {@link MergeState} instance to be restricted to a set of fields.
|
||||||
|
* <p>
|
||||||
|
* Warning: the input {@linkplain MergeState} instance will be updated when calling {@link #apply(Collection)}.
|
||||||
|
* <p>
|
||||||
|
* It should be called within a {@code try {...} finally {...}} block to make sure that the mergeState instance is
|
||||||
|
* restored to its original state:
|
||||||
|
* <pre>
|
||||||
|
* PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
|
||||||
|
* try {
|
||||||
|
* doSomething(pfMergeState.apply(fields));
|
||||||
|
* ...
|
||||||
|
* } finally {
|
||||||
|
* pfMergeState.reset();
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
final class PerFieldMergeState {
|
||||||
|
private final MergeState in;
|
||||||
|
private final FieldInfos orgMergeFieldInfos;
|
||||||
|
private final FieldInfos[] orgFieldInfos;
|
||||||
|
private final FieldsProducer[] orgFieldsProducers;
|
||||||
|
|
||||||
|
PerFieldMergeState(MergeState in) {
|
||||||
|
this.in = in;
|
||||||
|
this.orgMergeFieldInfos = in.mergeFieldInfos;
|
||||||
|
this.orgFieldInfos = new FieldInfos[in.fieldInfos.length];
|
||||||
|
this.orgFieldsProducers = new FieldsProducer[in.fieldsProducers.length];
|
||||||
|
|
||||||
|
System.arraycopy(in.fieldInfos, 0, this.orgFieldInfos, 0, this.orgFieldInfos.length);
|
||||||
|
System.arraycopy(in.fieldsProducers, 0, this.orgFieldsProducers, 0, this.orgFieldsProducers.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the input {@link MergeState} instance to restrict the fields to the given ones.
|
||||||
|
*
|
||||||
|
* @param fields The fields to keep in the updated instance.
|
||||||
|
* @return The updated instance.
|
||||||
|
*/
|
||||||
|
MergeState apply(Collection<String> fields) {
|
||||||
|
in.mergeFieldInfos = new FilterFieldInfos(orgMergeFieldInfos, fields);
|
||||||
|
for (int i = 0; i < orgFieldInfos.length; i++) {
|
||||||
|
in.fieldInfos[i] = new FilterFieldInfos(orgFieldInfos[i], fields);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < orgFieldsProducers.length; i++) {
|
||||||
|
in.fieldsProducers[i] = new FilterFieldsProducer(orgFieldsProducers[i], fields);
|
||||||
|
}
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resets the input {@link MergeState} instance to its original state.
|
||||||
|
*
|
||||||
|
* @return The reset instance.
|
||||||
|
*/
|
||||||
|
MergeState reset() {
|
||||||
|
in.mergeFieldInfos = orgMergeFieldInfos;
|
||||||
|
System.arraycopy(orgFieldInfos, 0, in.fieldInfos, 0, in.fieldInfos.length);
|
||||||
|
System.arraycopy(orgFieldsProducers, 0, in.fieldsProducers, 0, in.fieldsProducers.length);
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class FilterFieldInfos extends FieldInfos {
|
||||||
|
private final Set<String> filteredNames;
|
||||||
|
private final List<FieldInfo> filtered;
|
||||||
|
|
||||||
|
// Copy of the private fields from FieldInfos
|
||||||
|
// Renamed so as to be less confusing about which fields we're referring to
|
||||||
|
private final boolean filteredHasVectors;
|
||||||
|
private final boolean filteredHasProx;
|
||||||
|
private final boolean filteredHasPayloads;
|
||||||
|
private final boolean filteredHasOffsets;
|
||||||
|
private final boolean filteredHasFreq;
|
||||||
|
private final boolean filteredHasNorms;
|
||||||
|
private final boolean filteredHasDocValues;
|
||||||
|
private final boolean filteredHasPointValues;
|
||||||
|
|
||||||
|
FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
|
||||||
|
// Copy all the input FieldInfo objects since the field numbering must be kept consistent
|
||||||
|
super(toArray(src));
|
||||||
|
|
||||||
|
boolean hasVectors = false;
|
||||||
|
boolean hasProx = false;
|
||||||
|
boolean hasPayloads = false;
|
||||||
|
boolean hasOffsets = false;
|
||||||
|
boolean hasFreq = false;
|
||||||
|
boolean hasNorms = false;
|
||||||
|
boolean hasDocValues = false;
|
||||||
|
boolean hasPointValues = false;
|
||||||
|
|
||||||
|
this.filteredNames = new HashSet<>(filterFields);
|
||||||
|
this.filtered = new ArrayList<>(filterFields.size());
|
||||||
|
for (FieldInfo fi : src) {
|
||||||
|
if (filterFields.contains(fi.name)) {
|
||||||
|
this.filtered.add(fi);
|
||||||
|
hasVectors |= fi.hasVectors();
|
||||||
|
hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
|
||||||
|
hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
hasNorms |= fi.hasNorms();
|
||||||
|
hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
|
||||||
|
hasPayloads |= fi.hasPayloads();
|
||||||
|
hasPointValues |= (fi.getPointDimensionCount() != 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.filteredHasVectors = hasVectors;
|
||||||
|
this.filteredHasProx = hasProx;
|
||||||
|
this.filteredHasPayloads = hasPayloads;
|
||||||
|
this.filteredHasOffsets = hasOffsets;
|
||||||
|
this.filteredHasFreq = hasFreq;
|
||||||
|
this.filteredHasNorms = hasNorms;
|
||||||
|
this.filteredHasDocValues = hasDocValues;
|
||||||
|
this.filteredHasPointValues = hasPointValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static FieldInfo[] toArray(FieldInfos src) {
|
||||||
|
FieldInfo[] res = new FieldInfo[src.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (FieldInfo fi : src) {
|
||||||
|
res[i++] = fi;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<FieldInfo> iterator() {
|
||||||
|
return filtered.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasFreq() {
|
||||||
|
return filteredHasFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasProx() {
|
||||||
|
return filteredHasProx;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return filteredHasPayloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return filteredHasOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasVectors() {
|
||||||
|
return filteredHasVectors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNorms() {
|
||||||
|
return filteredHasNorms;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasDocValues() {
|
||||||
|
return filteredHasDocValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPointValues() {
|
||||||
|
return filteredHasPointValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return filtered.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldInfo fieldInfo(String fieldName) {
|
||||||
|
if (!filteredNames.contains(fieldName)) {
|
||||||
|
// Throw IAE to be consistent with fieldInfo(int) which throws it as well on invalid numbers
|
||||||
|
throw new IllegalArgumentException("The field named '" + fieldName + "' is not accessible in the current " +
|
||||||
|
"merge context, available ones are: " + filteredNames);
|
||||||
|
}
|
||||||
|
return super.fieldInfo(fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldInfo fieldInfo(int fieldNumber) {
|
||||||
|
FieldInfo res = super.fieldInfo(fieldNumber);
|
||||||
|
if (!filteredNames.contains(res.name)) {
|
||||||
|
throw new IllegalArgumentException("The field named '" + res.name + "' numbered '" + fieldNumber + "' is not " +
|
||||||
|
"accessible in the current merge context, available ones are: " + filteredNames);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class FilterFieldsProducer extends FieldsProducer {
|
||||||
|
private final FieldsProducer in;
|
||||||
|
private final List<String> filtered;
|
||||||
|
|
||||||
|
FilterFieldsProducer(FieldsProducer in, Collection<String> filterFields) {
|
||||||
|
this.in = in;
|
||||||
|
this.filtered = new ArrayList<>(filterFields);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return in.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return filtered.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
if (!filtered.contains(field)) {
|
||||||
|
throw new IllegalArgumentException("The field named '" + field + "' is not accessible in the current " +
|
||||||
|
"merge context, available ones are: " + filtered);
|
||||||
|
}
|
||||||
|
return in.terms(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return filtered.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
in.checkIntegrity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,6 +39,8 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FilterLeafReader.FilterFields;
|
import org.apache.lucene.index.FilterLeafReader.FilterFields;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -116,7 +118,61 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(Fields fields) throws IOException {
|
public void write(Fields fields) throws IOException {
|
||||||
|
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
|
||||||
|
|
||||||
|
// Write postings
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
|
||||||
|
PostingsFormat format = ent.getKey();
|
||||||
|
final FieldsGroup group = ent.getValue();
|
||||||
|
|
||||||
|
// Exposes only the fields from this group:
|
||||||
|
Fields maskedFields = new FilterFields(fields) {
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return group.fields.iterator();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
FieldsConsumer consumer = format.fieldsConsumer(group.state);
|
||||||
|
toClose.add(consumer);
|
||||||
|
consumer.write(maskedFields);
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(toClose);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null));
|
||||||
|
|
||||||
|
// Merge postings
|
||||||
|
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
|
||||||
|
PostingsFormat format = ent.getKey();
|
||||||
|
final FieldsGroup group = ent.getValue();
|
||||||
|
|
||||||
|
FieldsConsumer consumer = format.fieldsConsumer(group.state);
|
||||||
|
toClose.add(consumer);
|
||||||
|
consumer.merge(pfMergeState.apply(group.fields));
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
pfMergeState.reset();
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(toClose);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<PostingsFormat, FieldsGroup> buildFieldsGroupMapping(Fields fields) {
|
||||||
// Maps a PostingsFormat instance to the suffix it
|
// Maps a PostingsFormat instance to the suffix it
|
||||||
// should use
|
// should use
|
||||||
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
|
Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<>();
|
||||||
|
@ -124,7 +180,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||||
// Holds last suffix of each PostingFormat name
|
// Holds last suffix of each PostingFormat name
|
||||||
Map<String,Integer> suffixes = new HashMap<>();
|
Map<String,Integer> suffixes = new HashMap<>();
|
||||||
|
|
||||||
// First pass: assign field -> PostingsFormat
|
// Assign field -> PostingsFormat
|
||||||
for(String field : fields) {
|
for(String field : fields) {
|
||||||
FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
|
FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
|
||||||
|
|
||||||
|
@ -177,32 +233,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||||
", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix);
|
", field=" + fieldInfo.name + ", old=" + previousValue + ", new=" + group.suffix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return formatToGroups;
|
||||||
// Second pass: write postings
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
for(Map.Entry<PostingsFormat,FieldsGroup> ent : formatToGroups.entrySet()) {
|
|
||||||
PostingsFormat format = ent.getKey();
|
|
||||||
final FieldsGroup group = ent.getValue();
|
|
||||||
|
|
||||||
// Exposes only the fields from this group:
|
|
||||||
Fields maskedFields = new FilterFields(fields) {
|
|
||||||
@Override
|
|
||||||
public Iterator<String> iterator() {
|
|
||||||
return group.fields.iterator();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
FieldsConsumer consumer = format.fieldsConsumer(group.state);
|
|
||||||
toClose.add(consumer);
|
|
||||||
consumer.write(maskedFields);
|
|
||||||
}
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (success == false) {
|
|
||||||
IOUtils.closeWhileHandlingException(toClose);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,13 +18,19 @@ package org.apache.lucene.codecs.perfield;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||||
import org.apache.lucene.document.BinaryDocValuesField;
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -33,11 +39,15 @@ import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.index.BaseDocValuesFormatTestCase;
|
import org.apache.lucene.index.BaseDocValuesFormatTestCase;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.RandomCodec;
|
import org.apache.lucene.index.RandomCodec;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -127,4 +137,116 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
|
||||||
ireader.close();
|
ireader.close();
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMergeCalledOnTwoFormats() throws IOException {
|
||||||
|
MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
|
||||||
|
MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
|
||||||
|
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||||
|
iwc.setCodec(new AssertingCodec() {
|
||||||
|
@Override
|
||||||
|
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||||
|
switch (field) {
|
||||||
|
case "dv1":
|
||||||
|
case "dv2":
|
||||||
|
return dvf1;
|
||||||
|
|
||||||
|
case "dv3":
|
||||||
|
return dvf2;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return super.getDocValuesFormatForField(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
|
||||||
|
IndexWriter iwriter = new IndexWriter(directory, iwc);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("dv1", 5));
|
||||||
|
doc.add(new NumericDocValuesField("dv2", 42));
|
||||||
|
doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("dv1", 8));
|
||||||
|
doc.add(new NumericDocValuesField("dv2", 45));
|
||||||
|
doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
|
||||||
|
iwriter.forceMerge(1, true);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
assertEquals(1, dvf1.nbMergeCalls);
|
||||||
|
assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
|
||||||
|
assertEquals(1, dvf2.nbMergeCalls);
|
||||||
|
assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
|
||||||
|
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat {
|
||||||
|
private final DocValuesFormat delegate;
|
||||||
|
final List<String> fieldNames = new ArrayList<>();
|
||||||
|
volatile int nbMergeCalls = 0;
|
||||||
|
|
||||||
|
MergeRecordingDocValueFormatWrapper(DocValuesFormat delegate) {
|
||||||
|
super(delegate.getName());
|
||||||
|
this.delegate = delegate;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
final DocValuesConsumer consumer = delegate.fieldsConsumer(state);
|
||||||
|
return new DocValuesConsumer() {
|
||||||
|
@Override
|
||||||
|
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
|
consumer.addNumericField(field, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
|
||||||
|
consumer.addBinaryField(field, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
||||||
|
consumer.addSortedField(field, values, docToOrd);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||||
|
consumer.addSortedNumericField(field, docToValueCount, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
||||||
|
consumer.addSortedSetField(field, values, docToOrdCount, ords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
nbMergeCalls++;
|
||||||
|
for (FieldInfo fi : mergeState.mergeFieldInfos) {
|
||||||
|
fieldNames.add(fi.name);
|
||||||
|
}
|
||||||
|
consumer.merge(mergeState);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
consumer.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return delegate.fieldsProducer(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,9 +18,16 @@ package org.apache.lucene.codecs.perfield;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||||
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
|
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
|
||||||
|
@ -29,14 +36,21 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LogDocMergePolicy;
|
import org.apache.lucene.index.LogDocMergePolicy;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
@ -322,4 +336,100 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
dir.close(); // checkindex
|
dir.close(); // checkindex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
public void testMergeCalledOnTwoFormats() throws IOException {
|
||||||
|
MergeRecordingPostingsFormatWrapper pf1 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
|
||||||
|
MergeRecordingPostingsFormatWrapper pf2 = new MergeRecordingPostingsFormatWrapper(TestUtil.getDefaultPostingsFormat());
|
||||||
|
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||||
|
iwc.setCodec(new AssertingCodec() {
|
||||||
|
@Override
|
||||||
|
public PostingsFormat getPostingsFormatForField(String field) {
|
||||||
|
switch (field) {
|
||||||
|
case "f1":
|
||||||
|
case "f2":
|
||||||
|
return pf1;
|
||||||
|
|
||||||
|
case "f3":
|
||||||
|
case "f4":
|
||||||
|
return pf2;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return super.getPostingsFormatForField(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
|
||||||
|
IndexWriter iwriter = new IndexWriter(directory, iwc);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("f1", "val1", Field.Store.NO));
|
||||||
|
doc.add(new StringField("f2", "val2", Field.Store.YES));
|
||||||
|
doc.add(new IntPoint("f3", 3)); // Points are not indexed as postings and should not appear in the merge fields
|
||||||
|
doc.add(new StringField("f4", "val4", Field.Store.NO));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("f1", "val5", Field.Store.NO));
|
||||||
|
doc.add(new StringField("f2", "val6", Field.Store.YES));
|
||||||
|
doc.add(new IntPoint("f3", 7));
|
||||||
|
doc.add(new StringField("f4", "val8", Field.Store.NO));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
|
||||||
|
iwriter.forceMerge(1, true);
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
assertEquals(1, pf1.nbMergeCalls);
|
||||||
|
assertEquals(new HashSet<>(Arrays.asList("f1", "f2")), new HashSet<>(pf1.fieldNames));
|
||||||
|
assertEquals(1, pf2.nbMergeCalls);
|
||||||
|
assertEquals(Collections.singletonList("f4"), pf2.fieldNames);
|
||||||
|
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class MergeRecordingPostingsFormatWrapper extends PostingsFormat {
|
||||||
|
private final PostingsFormat delegate;
|
||||||
|
final List<String> fieldNames = new ArrayList<>();
|
||||||
|
int nbMergeCalls = 0;
|
||||||
|
|
||||||
|
MergeRecordingPostingsFormatWrapper(PostingsFormat delegate) {
|
||||||
|
super(delegate.getName());
|
||||||
|
this.delegate = delegate;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
final FieldsConsumer consumer = delegate.fieldsConsumer(state);
|
||||||
|
return new FieldsConsumer() {
|
||||||
|
@Override
|
||||||
|
public void write(Fields fields) throws IOException {
|
||||||
|
consumer.write(fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
nbMergeCalls++;
|
||||||
|
for (FieldInfo fi : mergeState.mergeFieldInfos) {
|
||||||
|
fieldNames.add(fi.name);
|
||||||
|
}
|
||||||
|
consumer.merge(mergeState);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
consumer.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return delegate.fieldsProducer(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue