mirror of https://github.com/apache/lucene.git
LUCENE-5752: merge trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5752@1603107 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
a135294eda
|
@ -282,6 +282,12 @@ Optimizations
|
|||
|
||||
* LUCENE-5751: Speed up MemoryDocValues. (Adrien Grand, Robert Muir)
|
||||
|
||||
* LUCENE-5767: OrdinalMap optimizations, that mostly help on low cardinalities.
|
||||
(Martijn van Groningen, Adrien Grand)
|
||||
|
||||
* LUCENE-5769: SingletonSortedSetDocValues now supports random access ordinals.
|
||||
(Robert Muir)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-5738: Ensure NativeFSLock prevents opening the file channel for the
|
||||
|
@ -327,6 +333,9 @@ Bug fixes
|
|||
* LUCENE-5747: Project-specific settings for the eclipse development
|
||||
environment will prevent automatic code reformatting. (Shawn Heisey)
|
||||
|
||||
* LUCENE-5768: Hunspell condition checks containing character classes
|
||||
were buggy. (Clinton Gormley, Robert Muir)
|
||||
|
||||
Test Framework
|
||||
|
||||
* LUCENE-5622: Fail tests if they print over the given limit of bytes to
|
||||
|
|
|
@ -420,7 +420,7 @@ public class Dictionary {
|
|||
|
||||
String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
|
||||
// at least the gascon affix file has this issue
|
||||
if (condition.startsWith("[") && !condition.endsWith("]")) {
|
||||
if (condition.startsWith("[") && condition.indexOf(']') == -1) {
|
||||
condition = condition + "]";
|
||||
}
|
||||
// "dash hasn't got special meaning" (we must escape it)
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestCondition2 extends StemmerTestBase {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
init("condition2.aff", "condition2.dic");
|
||||
}
|
||||
|
||||
public void testStemming() {
|
||||
assertStemsTo("monopolies", "monopoly");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
SET ISO8859-1
|
||||
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
|
||||
|
||||
SFX S Y 1
|
||||
SFX S y ies [^aeiou]y
|
|
@ -0,0 +1,2 @@
|
|||
1
|
||||
monopoly/S
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
/**
|
||||
* Abstract API that consumes numeric, binary and
|
||||
|
@ -505,6 +506,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
int nextValue;
|
||||
AtomicReader currentReader;
|
||||
Bits currentLiveDocs;
|
||||
LongValues currentMap;
|
||||
boolean nextIsSet;
|
||||
|
||||
@Override
|
||||
|
@ -539,6 +541,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (readerUpto < readers.length) {
|
||||
currentReader = readers[readerUpto];
|
||||
currentLiveDocs = currentReader.getLiveDocs();
|
||||
currentMap = map.getGlobalOrds(readerUpto);
|
||||
}
|
||||
docIDUpto = 0;
|
||||
continue;
|
||||
|
@ -547,7 +550,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
|
||||
nextIsSet = true;
|
||||
int segOrd = dvs[readerUpto].getOrd(docIDUpto);
|
||||
nextValue = segOrd == -1 ? -1 : (int) map.getGlobalOrd(readerUpto, segOrd);
|
||||
nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd);
|
||||
docIDUpto++;
|
||||
return true;
|
||||
}
|
||||
|
@ -707,6 +710,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
long nextValue;
|
||||
AtomicReader currentReader;
|
||||
Bits currentLiveDocs;
|
||||
LongValues currentMap;
|
||||
boolean nextIsSet;
|
||||
long ords[] = new long[8];
|
||||
int ordUpto;
|
||||
|
@ -751,6 +755,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (readerUpto < readers.length) {
|
||||
currentReader = readers[readerUpto];
|
||||
currentLiveDocs = currentReader.getLiveDocs();
|
||||
currentMap = map.getGlobalOrds(readerUpto);
|
||||
}
|
||||
docIDUpto = 0;
|
||||
continue;
|
||||
|
@ -766,7 +771,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (ordLength == ords.length) {
|
||||
ords = ArrayUtil.grow(ords, ordLength+1);
|
||||
}
|
||||
ords[ordLength] = map.getGlobalOrd(readerUpto, ord);
|
||||
ords[ordLength] = currentMap.get(ord);
|
||||
ordLength++;
|
||||
}
|
||||
docIDUpto++;
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
|
|||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
|
@ -373,7 +374,7 @@ public class MultiDocValues {
|
|||
return new MultiSortedSetDocValues(values, starts, mapping);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** maps per-segment ordinals to/from global ordinal space */
|
||||
// TODO: use more efficient packed ints structures?
|
||||
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
|
||||
|
@ -387,8 +388,10 @@ public class MultiDocValues {
|
|||
final MonotonicAppendingLongBuffer globalOrdDeltas;
|
||||
// globalOrd -> first segment container
|
||||
final AppendingPackedLongBuffer firstSegments;
|
||||
// for every segment, segmentOrd -> (globalOrd - segmentOrd)
|
||||
final MonotonicAppendingLongBuffer ordDeltas[];
|
||||
// for every segment, segmentOrd -> globalOrd
|
||||
final LongValues segmentToGlobalOrds[];
|
||||
// ram usage
|
||||
final long ramBytesUsed;
|
||||
|
||||
/**
|
||||
* Creates an ordinal map that allows mapping ords to/from a merged
|
||||
|
@ -398,16 +401,20 @@ public class MultiDocValues {
|
|||
* not be dense (e.g. can be FilteredTermsEnums}.
|
||||
* @throws IOException if an I/O error occurred.
|
||||
*/
|
||||
public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException {
|
||||
public OrdinalMap(Object owner, TermsEnum subs[], float acceptableOverheadRatio) throws IOException {
|
||||
// create the ordinal mappings by pulling a termsenum over each sub's
|
||||
// unique terms, and walking a multitermsenum over those
|
||||
this.owner = owner;
|
||||
// even though we accept an overhead ratio, we keep these ones with COMPACT
|
||||
// since they are only used to resolve values given a global ord, which is
|
||||
// slow anyway
|
||||
globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
|
||||
firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
|
||||
ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
|
||||
final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
|
||||
for (int i = 0; i < ordDeltas.length; i++) {
|
||||
ordDeltas[i] = new MonotonicAppendingLongBuffer();
|
||||
ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
|
||||
}
|
||||
long[] ordDeltaBits = new long[subs.length];
|
||||
long segmentOrds[] = new long[subs.length];
|
||||
ReaderSlice slices[] = new ReaderSlice[subs.length];
|
||||
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
|
||||
|
@ -431,6 +438,7 @@ public class MultiDocValues {
|
|||
}
|
||||
// for each per-segment ord, map it back to the global term.
|
||||
while (segmentOrds[segmentIndex] <= segmentOrd) {
|
||||
ordDeltaBits[segmentIndex] |= delta;
|
||||
ordDeltas[segmentIndex].add(delta);
|
||||
segmentOrds[segmentIndex]++;
|
||||
}
|
||||
|
@ -442,14 +450,62 @@ public class MultiDocValues {
|
|||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
ordDeltas[i].freeze();
|
||||
}
|
||||
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
|
||||
segmentToGlobalOrds = new LongValues[subs.length];
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds);
|
||||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
|
||||
if (ordDeltaBits[i] == 0L) {
|
||||
// segment ords perfectly match global ordinals
|
||||
// likely in case of low cardinalities and large segments
|
||||
segmentToGlobalOrds[i] = LongValues.IDENTITY;
|
||||
} else {
|
||||
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
|
||||
final long monotonicBits = deltas.ramBytesUsed() * 8;
|
||||
final long packedBits = bitsRequired * deltas.size();
|
||||
if (deltas.size() <= Integer.MAX_VALUE
|
||||
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
|
||||
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
|
||||
final int size = (int) deltas.size();
|
||||
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
|
||||
final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
|
||||
for (int ord = 0; ord < size; ++ord) {
|
||||
newDeltas.set(ord, it.next());
|
||||
}
|
||||
assert !it.hasNext();
|
||||
segmentToGlobalOrds[i] = new LongValues() {
|
||||
@Override
|
||||
public long get(long ord) {
|
||||
return ord + newDeltas.get((int) ord);
|
||||
}
|
||||
};
|
||||
ramBytesUsed += newDeltas.ramBytesUsed();
|
||||
} else {
|
||||
segmentToGlobalOrds[i] = new LongValues() {
|
||||
@Override
|
||||
public long get(long ord) {
|
||||
return ord + deltas.get((int) ord);
|
||||
}
|
||||
};
|
||||
ramBytesUsed += deltas.ramBytesUsed();
|
||||
}
|
||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
|
||||
}
|
||||
}
|
||||
this.ramBytesUsed = ramBytesUsed;
|
||||
}
|
||||
|
||||
|
||||
/** Create an {@link OrdinalMap} with the default overhead ratio. */
|
||||
public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException {
|
||||
this(owner, subs, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a segment number and segment ordinal, returns
|
||||
* the corresponding global ordinal.
|
||||
* Given a segment number, return a {@link LongValues} instance that maps
|
||||
* segment ordinals to global ordinals.
|
||||
*/
|
||||
public long getGlobalOrd(int segmentIndex, long segmentOrd) {
|
||||
return segmentOrd + ordDeltas[segmentIndex].get(segmentOrd);
|
||||
public LongValues getGlobalOrds(int segmentIndex) {
|
||||
return segmentToGlobalOrds[segmentIndex];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -477,11 +533,7 @@ public class MultiDocValues {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long size = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(ordDeltas);
|
||||
for (int i = 0; i < ordDeltas.length; i++) {
|
||||
size += ordDeltas[i].ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
return ramBytesUsed;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -499,7 +551,7 @@ public class MultiDocValues {
|
|||
|
||||
/** Creates a new MultiSortedDocValues over <code>values</code> */
|
||||
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
assert values.length == mapping.ordDeltas.length;
|
||||
assert values.length == mapping.segmentToGlobalOrds.length;
|
||||
assert docStarts.length == values.length + 1;
|
||||
this.values = values;
|
||||
this.docStarts = docStarts;
|
||||
|
@ -510,7 +562,7 @@ public class MultiDocValues {
|
|||
public int getOrd(int docID) {
|
||||
int subIndex = ReaderUtil.subIndex(docID, docStarts);
|
||||
int segmentOrd = values[subIndex].getOrd(docID - docStarts[subIndex]);
|
||||
return segmentOrd == -1 ? segmentOrd : (int) mapping.getGlobalOrd(subIndex, segmentOrd);
|
||||
return segmentOrd == -1 ? segmentOrd : (int) mapping.segmentToGlobalOrds[subIndex].get(segmentOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -541,7 +593,7 @@ public class MultiDocValues {
|
|||
|
||||
/** Creates a new MultiSortedSetDocValues over <code>values</code> */
|
||||
MultiSortedSetDocValues(SortedSetDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
assert values.length == mapping.ordDeltas.length;
|
||||
assert values.length == mapping.segmentToGlobalOrds.length;
|
||||
assert docStarts.length == values.length + 1;
|
||||
this.values = values;
|
||||
this.docStarts = docStarts;
|
||||
|
@ -554,7 +606,7 @@ public class MultiDocValues {
|
|||
if (segmentOrd == NO_MORE_ORDS) {
|
||||
return segmentOrd;
|
||||
} else {
|
||||
return mapping.getGlobalOrd(currentSubIndex, segmentOrd);
|
||||
return mapping.segmentToGlobalOrds[currentSubIndex].get(segmentOrd);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,10 +25,10 @@ import org.apache.lucene.util.BytesRef;
|
|||
* This can be used if you want to have one multi-valued implementation
|
||||
* that works for single or multi-valued types.
|
||||
*/
|
||||
final class SingletonSortedSetDocValues extends SortedSetDocValues {
|
||||
final class SingletonSortedSetDocValues extends RandomAccessOrds {
|
||||
private final SortedDocValues in;
|
||||
private int docID;
|
||||
private boolean set;
|
||||
private long currentOrd;
|
||||
private long ord;
|
||||
|
||||
/** Creates a multi-valued view over the provided SortedDocValues */
|
||||
public SingletonSortedSetDocValues(SortedDocValues in) {
|
||||
|
@ -43,18 +43,14 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues {
|
|||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
if (set) {
|
||||
return NO_MORE_ORDS;
|
||||
} else {
|
||||
set = true;
|
||||
return in.getOrd(docID);
|
||||
}
|
||||
long v = currentOrd;
|
||||
currentOrd = NO_MORE_ORDS;
|
||||
return v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int docID) {
|
||||
this.docID = docID;
|
||||
set = false;
|
||||
currentOrd = ord = in.getOrd(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -72,4 +68,19 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues {
|
|||
public long lookupTerm(BytesRef key) {
|
||||
return in.lookupTerm(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ordAt(int index) {
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int cardinality() {
|
||||
return (int) (ord >>> 63) ^ 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum() {
|
||||
return in.termsEnum();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,16 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* @lucene.internal */
|
||||
public abstract class LongValues extends NumericDocValues {
|
||||
|
||||
/** An instance that returns the provided value. */
|
||||
public static final LongValues IDENTITY = new LongValues() {
|
||||
|
||||
@Override
|
||||
public long get(long index) {
|
||||
return index;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/** Get value at <code>index</code>. */
|
||||
public abstract long get(long index);
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
|||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.RamUsageTester;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -44,6 +45,10 @@ public class TestOrdinalMap extends LuceneTestCase {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean accept(Object o) {
|
||||
return o != LongValues.IDENTITY;
|
||||
}
|
||||
};
|
||||
|
||||
public void testRamBytesUsed() throws IOException {
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.index.ReaderUtil;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
/** Compute facets counts from previously
|
||||
* indexed {@link SortedSetDocValuesFacetField},
|
||||
|
@ -188,7 +189,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
// temp ram req'ts (sum of number of ords across all
|
||||
// segs)
|
||||
if (ordinalMap != null) {
|
||||
int segOrd = hits.context.ord;
|
||||
final int segOrd = hits.context.ord;
|
||||
final LongValues ordMap = ordinalMap.getGlobalOrds(segOrd);
|
||||
|
||||
int numSegOrds = (int) segValues.getValueCount();
|
||||
|
||||
|
@ -202,7 +204,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
int term = (int) segValues.nextOrd();
|
||||
while (term != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
//System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
|
||||
counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++;
|
||||
counts[(int) ordMap.get(term)]++;
|
||||
term = (int) segValues.nextOrd();
|
||||
}
|
||||
}
|
||||
|
@ -228,7 +230,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
int count = segCounts[ord];
|
||||
if (count != 0) {
|
||||
//System.out.println(" migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord));
|
||||
counts[(int) ordinalMap.getGlobalOrd(segOrd, ord)] += count;
|
||||
counts[(int) ordMap.get(ord)] += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,10 @@ public final class RamUsageTester {
|
|||
return true;
|
||||
}
|
||||
|
||||
public boolean accept(Object o) {
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/** A filter that allows to decide on what to take into account when measuring RAM usage. */
|
||||
|
@ -47,6 +51,9 @@ public final class RamUsageTester {
|
|||
/** Whether the provided field should be taken into account when measuring RAM usage. */
|
||||
boolean accept(Field field);
|
||||
|
||||
/** Whether the provided field value should be taken into account when measuring RAM usage. */
|
||||
boolean accept(Object o);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -119,7 +126,7 @@ public final class RamUsageTester {
|
|||
// Push refs for traversal later.
|
||||
for (int i = len; --i >= 0 ;) {
|
||||
final Object o = Array.get(ob, i);
|
||||
if (o != null && !seen.contains(o)) {
|
||||
if (o != null && !seen.contains(o) && filter.accept(o)) {
|
||||
stack.add(o);
|
||||
}
|
||||
}
|
||||
|
@ -141,7 +148,7 @@ public final class RamUsageTester {
|
|||
if (filter.accept(f)) {
|
||||
// Fast path to eliminate redundancies.
|
||||
final Object o = f.get(ob);
|
||||
if (o != null && !seen.contains(o)) {
|
||||
if (o != null && !seen.contains(o) && filter.accept(o)) {
|
||||
stack.add(o);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -184,6 +184,9 @@ Bug Fixes
|
|||
* SOLR-5426: Fixed a bug in ReverseWildCardFilter that could cause
|
||||
InvalidTokenOffsetsException when highlighting. (Uwe Schindler, Arun Kumar, via hossman)
|
||||
|
||||
* SOLR-6175: DebugComponent throws NPE on shard exceptions when using shards.tolerant.
|
||||
(Tomás Fernández Löbbe via shalin)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -206,6 +206,11 @@ public class DebugComponent extends SearchComponent
|
|||
|
||||
for (ShardRequest sreq : rb.finished) {
|
||||
for (ShardResponse srsp : sreq.responses) {
|
||||
if (srsp.getException() != null) {
|
||||
// can't expect the debug content if there was an exception for this request
|
||||
// this should only happen when using shards.tolerant=true
|
||||
continue;
|
||||
}
|
||||
NamedList sdebug = (NamedList)srsp.getSolrResponse().getResponse().get("debug");
|
||||
info = (NamedList)merge(sdebug, info, EXCLUDE_SET);
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_DEBUG) != 0) {
|
||||
|
@ -257,6 +262,10 @@ public class DebugComponent extends SearchComponent
|
|||
|
||||
private NamedList<String> getTrackResponse(ShardResponse shardResponse) {
|
||||
NamedList<String> namedList = new NamedList<>();
|
||||
if (shardResponse.getException() != null) {
|
||||
namedList.add("Exception", shardResponse.getException().getMessage());
|
||||
return namedList;
|
||||
}
|
||||
NamedList<Object> responseNL = shardResponse.getSolrResponse().getResponse();
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Object> responseHeader = (NamedList<Object>)responseNL.get("responseHeader");
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -247,11 +248,12 @@ public class DocValuesFacets {
|
|||
|
||||
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */
|
||||
static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
final LongValues ordmap = map == null ? null : map.getGlobalOrds(subIndex);
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int term = si.getOrd(doc);
|
||||
if (map != null && term >= 0) {
|
||||
term = (int) map.getGlobalOrd(subIndex, term);
|
||||
term = (int) ordmap.get(term);
|
||||
}
|
||||
int arrIdx = term-startTermIndex;
|
||||
if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++;
|
||||
|
@ -293,6 +295,7 @@ public class DocValuesFacets {
|
|||
|
||||
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */
|
||||
static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
si.setDocument(doc);
|
||||
|
@ -307,7 +310,7 @@ public class DocValuesFacets {
|
|||
|
||||
do {
|
||||
if (map != null) {
|
||||
term = (int) map.getGlobalOrd(subIndex, term);
|
||||
term = (int) ordMap.get(term);
|
||||
}
|
||||
int arrIdx = term-startTermIndex;
|
||||
if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++;
|
||||
|
@ -346,6 +349,7 @@ public class DocValuesFacets {
|
|||
|
||||
/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */
|
||||
static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) {
|
||||
final LongValues ordMap = map.getGlobalOrds(subIndex);
|
||||
// missing count
|
||||
counts[0] += segCounts[0];
|
||||
|
||||
|
@ -353,7 +357,7 @@ public class DocValuesFacets {
|
|||
for (int ord = 1; ord < segCounts.length; ord++) {
|
||||
int count = segCounts[ord];
|
||||
if (count != 0) {
|
||||
counts[1+(int) map.getGlobalOrd(subIndex, ord-1)] += count;
|
||||
counts[1+(int) ordMap.get(ord-1)] += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.search.Filter;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.solr.handler.component.FieldFacetStats;
|
||||
import org.apache.solr.handler.component.StatsValues;
|
||||
import org.apache.solr.handler.component.StatsValuesFactory;
|
||||
|
@ -161,12 +162,13 @@ public class DocValuesStats {
|
|||
|
||||
/** accumulates per-segment single-valued stats */
|
||||
static void accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int term = si.getOrd(doc);
|
||||
if (term >= 0) {
|
||||
if (map != null) {
|
||||
term = (int) map.getGlobalOrd(subIndex, term);
|
||||
term = (int) ordMap.get(term);
|
||||
}
|
||||
counts[term]++;
|
||||
for (FieldFacetStats f : facetStats) {
|
||||
|
@ -178,6 +180,7 @@ public class DocValuesStats {
|
|||
|
||||
/** accumulates per-segment multi-valued stats */
|
||||
static void accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
si.setDocument(doc);
|
||||
|
@ -185,7 +188,7 @@ public class DocValuesStats {
|
|||
while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
int term = (int) ord;
|
||||
if (map != null) {
|
||||
term = (int) map.getGlobalOrd(subIndex, term);
|
||||
term = (int) ordMap.get(term);
|
||||
}
|
||||
counts[term]++;
|
||||
for (FieldFacetStats f : facetStats) {
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
@ -18,7 +19,9 @@ import org.apache.solr.client.solrj.SolrServerException;
|
|||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -96,6 +99,7 @@ public class DistributedDebugComponentTest extends SolrJettyTestBase {
|
|||
collection2 = null;
|
||||
jetty.stop();
|
||||
jetty=null;
|
||||
resetExceptionIgnores();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -367,6 +371,35 @@ public class DistributedDebugComponentTest extends SolrJettyTestBase {
|
|||
assertSameKeys((NamedList<?>)nonDistribResponse.getDebugMap().get("timing"), (NamedList<?>)distribResponse.getDebugMap().get("timing"));
|
||||
}
|
||||
|
||||
public void testTolerantSearch() throws SolrServerException {
|
||||
String badShard = "[ff01::0083]:3334";
|
||||
SolrQuery query = new SolrQuery();
|
||||
query.setQuery("*:*");
|
||||
query.set("debug", "true");
|
||||
query.set("distrib", "true");
|
||||
query.setFields("id", "text");
|
||||
query.set("shards", shard1 + "," + shard2 + "," + badShard);
|
||||
try {
|
||||
ignoreException("Server refused connection");
|
||||
// verify that the request would fail if shards.tolerant=false
|
||||
collection1.query(query);
|
||||
fail("Expecting exception");
|
||||
} catch (SolrException e) {
|
||||
//expected
|
||||
}
|
||||
query.set(ShardParams.SHARDS_TOLERANT, "true");
|
||||
QueryResponse response = collection1.query(query);
|
||||
assertTrue((Boolean)response.getResponseHeader().get("partialResults"));
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<String> badShardTrack = (NamedList<String>) ((NamedList<NamedList<String>>)
|
||||
((NamedList<NamedList<NamedList<String>>>)response.getDebugMap().get("track")).get("EXECUTE_QUERY")).get(badShard);
|
||||
assertEquals("Unexpected response size for shard", 1, badShardTrack.size());
|
||||
Entry<String, String> exception = badShardTrack.iterator().next();
|
||||
assertEquals("Expected key 'Exception' not found", "Exception", exception.getKey());
|
||||
assertTrue("Unexpected exception message", exception.getValue().contains("Server refused connection"));
|
||||
unIgnoreException("Server refused connection");
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the same section on the two query responses
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue