From e7f81b4d6ccf72b218087527ec3cfc89dd12f16f Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 16 Jun 2014 20:33:44 +0000 Subject: [PATCH 1/5] LUCENE-5767: OrdinalMap optimizations. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602997 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../lucene/codecs/DocValuesConsumer.java | 9 +- .../apache/lucene/index/MultiDocValues.java | 93 +++++++++++++++---- .../org/apache/lucene/util/LongValues.java | 10 ++ .../apache/lucene/index/TestOrdinalMap.java | 5 + .../SortedSetDocValuesFacetCounts.java | 8 +- .../apache/lucene/util/RamUsageTester.java | 11 ++- .../apache/solr/request/DocValuesFacets.java | 10 +- .../apache/solr/request/DocValuesStats.java | 7 +- 9 files changed, 124 insertions(+), 32 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 733bc7e0341..217c39228e0 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -282,6 +282,9 @@ Optimizations * LUCENE-5751: Speed up MemoryDocValues. (Adrien Grand, Robert Muir) +* LUCENE-5767: OrdinalMap optimizations, that mostly help on low cardinalities. + (Martijn van Groningen, Adrien Grand) + Bug fixes * LUCENE-5738: Ensure NativeFSLock prevents opening the file channel for the diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 878c9d9cebf..6e1cd9d71f2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -39,6 +39,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.LongValues; /** * Abstract API that consumes numeric, binary and @@ -505,6 +506,7 @@ public abstract class DocValuesConsumer implements Closeable { int nextValue; AtomicReader currentReader; Bits currentLiveDocs; + LongValues currentMap; boolean nextIsSet; @Override @@ -539,6 +541,7 @@ public abstract class DocValuesConsumer implements Closeable { if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); + currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; @@ -547,7 +550,7 @@ public abstract class DocValuesConsumer implements Closeable { if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) { nextIsSet = true; int segOrd = dvs[readerUpto].getOrd(docIDUpto); - nextValue = segOrd == -1 ? -1 : (int) map.getGlobalOrd(readerUpto, segOrd); + nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd); docIDUpto++; return true; } @@ -707,6 +710,7 @@ public abstract class DocValuesConsumer implements Closeable { long nextValue; AtomicReader currentReader; Bits currentLiveDocs; + LongValues currentMap; boolean nextIsSet; long ords[] = new long[8]; int ordUpto; @@ -751,6 +755,7 @@ public abstract class DocValuesConsumer implements Closeable { if (readerUpto < readers.length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.getLiveDocs(); + currentMap = map.getGlobalOrds(readerUpto); } docIDUpto = 0; continue; @@ -766,7 +771,7 @@ public abstract class DocValuesConsumer implements Closeable { if (ordLength == ords.length) { ords = ArrayUtil.grow(ords, ordLength+1); } - ords[ordLength] = map.getGlobalOrd(readerUpto, ord); + ords[ordLength] = currentMap.get(ord); ordLength++; } docIDUpto++; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index 207d6a0a869..0c21ed426ba 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongValues; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.AppendingPackedLongBuffer; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; @@ -373,7 +374,7 @@ public class MultiDocValues { return new MultiSortedSetDocValues(values, starts, mapping); } } - + /** maps per-segment ordinals to/from global ordinal space */ // TODO: use more efficient packed ints structures? // TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums) @@ -387,8 +388,10 @@ public class MultiDocValues { final MonotonicAppendingLongBuffer globalOrdDeltas; // globalOrd -> first segment container final AppendingPackedLongBuffer firstSegments; - // for every segment, segmentOrd -> (globalOrd - segmentOrd) - final MonotonicAppendingLongBuffer ordDeltas[]; + // for every segment, segmentOrd -> globalOrd + final LongValues segmentToGlobalOrds[]; + // ram usage + final long ramBytesUsed; /** * Creates an ordinal map that allows mapping ords to/from a merged @@ -398,16 +401,20 @@ public class MultiDocValues { * not be dense (e.g. can be FilteredTermsEnums}. * @throws IOException if an I/O error occurred. */ - public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException { + public OrdinalMap(Object owner, TermsEnum subs[], float acceptableOverheadRatio) throws IOException { // create the ordinal mappings by pulling a termsenum over each sub's // unique terms, and walking a multitermsenum over those this.owner = owner; + // even though we accept an overhead ratio, we keep these ones with COMPACT + // since they are only used to resolve values given a global ord, which is + // slow anyway globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT); firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT); - ordDeltas = new MonotonicAppendingLongBuffer[subs.length]; + final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length]; for (int i = 0; i < ordDeltas.length; i++) { - ordDeltas[i] = new MonotonicAppendingLongBuffer(); + ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio); } + long[] ordDeltaBits = new long[subs.length]; long segmentOrds[] = new long[subs.length]; ReaderSlice slices[] = new ReaderSlice[subs.length]; TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length]; @@ -431,6 +438,7 @@ public class MultiDocValues { } // for each per-segment ord, map it back to the global term. while (segmentOrds[segmentIndex] <= segmentOrd) { + ordDeltaBits[segmentIndex] |= delta; ordDeltas[segmentIndex].add(delta); segmentOrds[segmentIndex]++; } @@ -442,14 +450,63 @@ public class MultiDocValues { for (int i = 0; i < ordDeltas.length; ++i) { ordDeltas[i].freeze(); } + // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster + segmentToGlobalOrds = new LongValues[subs.length]; + long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds); + for (int i = 0; i < ordDeltas.length; ++i) { + final MonotonicAppendingLongBuffer deltas = ordDeltas[i]; + if (ordDeltaBits[i] == 0L) { + // segment ords perfectly match global ordinals + // likely in case of low cardinalities and large segments + segmentToGlobalOrds[i] = LongValues.IDENTITY; + } else { + final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]); + final long monotonicBits = deltas.ramBytesUsed() * 8; + final long packedBits = bitsRequired * deltas.size(); + if (deltas.size() <= Integer.MAX_VALUE + && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) { + // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints + final int size = (int) deltas.size(); + final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio); + final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator(); + for (int ord = 0; ord < size; ++ord) { + newDeltas.set(ord, it.next()); + } + assert !it.hasNext(); + segmentToGlobalOrds[i] = new LongValues() { + @Override + public long get(long ord) { + return ord + newDeltas.get((int) ord); + } + }; + ramBytesUsed += newDeltas.ramBytesUsed(); + } else { + segmentToGlobalOrds[i] = new LongValues() { + @Override + public long get(long ord) { + return ord + deltas.get((int) ord); + } + }; + ramBytesUsed += deltas.ramBytesUsed(); + } + ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]); + } + } + this.ramBytesUsed = ramBytesUsed; } - + + /** Create an {@link OrdinalMap} with the default overhead ratio. + * @see #OrdinalMap(Object, TermsEnum[], float) */ + public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException { + this(owner, subs, PackedInts.DEFAULT); + } + /** - * Given a segment number and segment ordinal, returns - * the corresponding global ordinal. + * Given a segment number, return a {@link LongValues} instance that maps + * segment ordinals to global ordinals. */ - public long getGlobalOrd(int segmentIndex, long segmentOrd) { - return segmentOrd + ordDeltas[segmentIndex].get(segmentOrd); + public LongValues getGlobalOrds(int segmentIndex) { + return segmentToGlobalOrds[segmentIndex]; } /** @@ -477,11 +534,7 @@ public class MultiDocValues { @Override public long ramBytesUsed() { - long size = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(ordDeltas); - for (int i = 0; i < ordDeltas.length; i++) { - size += ordDeltas[i].ramBytesUsed(); - } - return size; + return ramBytesUsed; } } @@ -499,7 +552,7 @@ public class MultiDocValues { /** Creates a new MultiSortedDocValues over values */ MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException { - assert values.length == mapping.ordDeltas.length; + assert values.length == mapping.segmentToGlobalOrds.length; assert docStarts.length == values.length + 1; this.values = values; this.docStarts = docStarts; @@ -510,7 +563,7 @@ public class MultiDocValues { public int getOrd(int docID) { int subIndex = ReaderUtil.subIndex(docID, docStarts); int segmentOrd = values[subIndex].getOrd(docID - docStarts[subIndex]); - return segmentOrd == -1 ? segmentOrd : (int) mapping.getGlobalOrd(subIndex, segmentOrd); + return segmentOrd == -1 ? segmentOrd : (int) mapping.segmentToGlobalOrds[subIndex].get(segmentOrd); } @Override @@ -541,7 +594,7 @@ public class MultiDocValues { /** Creates a new MultiSortedSetDocValues over values */ MultiSortedSetDocValues(SortedSetDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException { - assert values.length == mapping.ordDeltas.length; + assert values.length == mapping.segmentToGlobalOrds.length; assert docStarts.length == values.length + 1; this.values = values; this.docStarts = docStarts; @@ -554,7 +607,7 @@ public class MultiDocValues { if (segmentOrd == NO_MORE_ORDS) { return segmentOrd; } else { - return mapping.getGlobalOrd(currentSubIndex, segmentOrd); + return mapping.segmentToGlobalOrds[currentSubIndex].get(segmentOrd); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/LongValues.java b/lucene/core/src/java/org/apache/lucene/util/LongValues.java index 25435066c9b..138ea55a6ae 100644 --- a/lucene/core/src/java/org/apache/lucene/util/LongValues.java +++ b/lucene/core/src/java/org/apache/lucene/util/LongValues.java @@ -27,6 +27,16 @@ import org.apache.lucene.util.packed.PackedInts; * @lucene.internal */ public abstract class LongValues extends NumericDocValues { + /** An instance that returns the provided value. */ + public static final LongValues IDENTITY = new LongValues() { + + @Override + public long get(long index) { + return index; + } + + }; + /** Get value at index. */ public abstract long get(long index); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java index 83520e848e8..35006e5a094 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java @@ -30,6 +30,7 @@ import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues.OrdinalMap; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.RamUsageTester; import org.apache.lucene.util.TestUtil; @@ -44,6 +45,10 @@ public class TestOrdinalMap extends LuceneTestCase { } return true; } + + public boolean accept(Object o) { + return o != LongValues.IDENTITY; + } }; public void testRamBytesUsed() throws IOException { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java index 1e20cccebdc..ccbae2616e2 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java @@ -40,6 +40,7 @@ import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongValues; /** Compute facets counts from previously * indexed {@link SortedSetDocValuesFacetField}, @@ -188,7 +189,8 @@ public class SortedSetDocValuesFacetCounts extends Facets { // temp ram req'ts (sum of number of ords across all // segs) if (ordinalMap != null) { - int segOrd = hits.context.ord; + final int segOrd = hits.context.ord; + final LongValues ordMap = ordinalMap.getGlobalOrds(segOrd); int numSegOrds = (int) segValues.getValueCount(); @@ -202,7 +204,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { int term = (int) segValues.nextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); - counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++; + counts[(int) ordMap.get(term)]++; term = (int) segValues.nextOrd(); } } @@ -228,7 +230,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { int count = segCounts[ord]; if (count != 0) { //System.out.println(" migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord)); - counts[(int) ordinalMap.getGlobalOrd(segOrd, ord)] += count; + counts[(int) ordMap.get(ord)] += count; } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java index 5a3d96dbcc1..bce875485d5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java @@ -39,6 +39,10 @@ public final class RamUsageTester { return true; } + public boolean accept(Object o) { + return true; + } + }; /** A filter that allows to decide on what to take into account when measuring RAM usage. */ @@ -47,6 +51,9 @@ public final class RamUsageTester { /** Whether the provided field should be taken into account when measuring RAM usage. */ boolean accept(Field field); + /** Whether the provided field value should be taken into account when measuring RAM usage. */ + boolean accept(Object o); + } /** @@ -119,7 +126,7 @@ public final class RamUsageTester { // Push refs for traversal later. for (int i = len; --i >= 0 ;) { final Object o = Array.get(ob, i); - if (o != null && !seen.contains(o)) { + if (o != null && !seen.contains(o) && filter.accept(o)) { stack.add(o); } } @@ -141,7 +148,7 @@ public final class RamUsageTester { if (filter.accept(f)) { // Fast path to eliminate redundancies. final Object o = f.get(ob); - if (o != null && !seen.contains(o)) { + if (o != null && !seen.contains(o) && filter.accept(o)) { stack.add(o); } } diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index 0712a073947..81d1220406e 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -32,6 +32,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.LongValues; import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; @@ -247,11 +248,12 @@ public class DocValuesFacets { /** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + final LongValues ordmap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (map != null && term >= 0) { - term = (int) map.getGlobalOrd(subIndex, term); + term = (int) ordmap.get(term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx=0 && arrIdx= 0) { if (map != null) { - term = (int) map.getGlobalOrd(subIndex, term); + term = (int) ordMap.get(term); } counts[term]++; for (FieldFacetStats f : facetStats) { @@ -178,6 +180,7 @@ public class DocValuesStats { /** accumulates per-segment multi-valued stats */ static void accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); @@ -185,7 +188,7 @@ public class DocValuesStats { while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { int term = (int) ord; if (map != null) { - term = (int) map.getGlobalOrd(subIndex, term); + term = (int) ordMap.get(term); } counts[term]++; for (FieldFacetStats f : facetStats) { From dd32343ee2ca8ac14d5093ef1aa0b91a7d8079c2 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 16 Jun 2014 21:56:05 +0000 Subject: [PATCH 2/5] LUCENE-5768: hunspell condition checks with character classes were buggy git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603007 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 ++ .../lucene/analysis/hunspell/Dictionary.java | 2 +- .../analysis/hunspell/TestCondition2.java | 32 +++++++++++++++++++ .../lucene/analysis/hunspell/condition2.aff | 5 +++ .../lucene/analysis/hunspell/condition2.dic | 2 ++ 5 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition2.java create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.aff create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.dic diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 217c39228e0..8b6aa79cd5d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -330,6 +330,9 @@ Bug fixes * LUCENE-5747: Project-specific settings for the eclipse development environment will prevent automatic code reformatting. (Shawn Heisey) +* LUCENE-5768: Hunspell condition checks containing character classes + were buggy. (Clinton Gormley, Robert Muir) + Test Framework * LUCENE-5622: Fail tests if they print over the given limit of bytes to diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 0651e4dd520..b67a978fab2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -420,7 +420,7 @@ public class Dictionary { String condition = ruleArgs.length > 4 ? ruleArgs[4] : "."; // at least the gascon affix file has this issue - if (condition.startsWith("[") && !condition.endsWith("]")) { + if (condition.startsWith("[") && condition.indexOf(']') == -1) { condition = condition + "]"; } // "dash hasn't got special meaning" (we must escape it) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition2.java new file mode 100644 index 00000000000..9e7234a6490 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition2.java @@ -0,0 +1,32 @@ +package org.apache.lucene.analysis.hunspell; + +import org.junit.BeforeClass; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestCondition2 extends StemmerTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + init("condition2.aff", "condition2.dic"); + } + + public void testStemming() { + assertStemsTo("monopolies", "monopoly"); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.aff new file mode 100644 index 00000000000..8e06a21b2ce --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.aff @@ -0,0 +1,5 @@ +SET ISO8859-1 +TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ' + +SFX S Y 1 +SFX S y ies [^aeiou]y diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.dic new file mode 100644 index 00000000000..72a8c3e7cf5 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition2.dic @@ -0,0 +1,2 @@ +1 +monopoly/S From 1b080217c29406b179d000518fc17f4ff611d521 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 16 Jun 2014 22:38:52 +0000 Subject: [PATCH 3/5] Remove javadoc @see tag. I can't manage to make it work with precommit. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603014 13f79535-47bb-0310-9956-ffa450edef68 --- .../core/src/java/org/apache/lucene/index/MultiDocValues.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index 0c21ed426ba..d619e15f126 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -495,8 +495,7 @@ public class MultiDocValues { this.ramBytesUsed = ramBytesUsed; } - /** Create an {@link OrdinalMap} with the default overhead ratio. - * @see #OrdinalMap(Object, TermsEnum[], float) */ + /** Create an {@link OrdinalMap} with the default overhead ratio. */ public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException { this(owner, subs, PackedInts.DEFAULT); } From 565dfc3e1fb7a5e69eafb83e4f301717b42d3bee Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 17 Jun 2014 02:48:44 +0000 Subject: [PATCH 4/5] LUCENE-5769: SingletonSortedSetDocValues now supports random access ordinals git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603044 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 ++ .../index/SingletonSortedSetDocValues.java | 33 ++++++++++++------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8b6aa79cd5d..ced44db7548 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -285,6 +285,9 @@ Optimizations * LUCENE-5767: OrdinalMap optimizations, that mostly help on low cardinalities. (Martijn van Groningen, Adrien Grand) +* LUCENE-5769: SingletonSortedSetDocValues now supports random access ordinals. + (Robert Muir) + Bug fixes * LUCENE-5738: Ensure NativeFSLock prevents opening the file channel for the diff --git a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java index 2e45ab1f9b8..b11c53a7093 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java @@ -25,10 +25,10 @@ import org.apache.lucene.util.BytesRef; * This can be used if you want to have one multi-valued implementation * that works for single or multi-valued types. */ -final class SingletonSortedSetDocValues extends SortedSetDocValues { +final class SingletonSortedSetDocValues extends RandomAccessOrds { private final SortedDocValues in; - private int docID; - private boolean set; + private long currentOrd; + private long ord; /** Creates a multi-valued view over the provided SortedDocValues */ public SingletonSortedSetDocValues(SortedDocValues in) { @@ -43,18 +43,14 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues { @Override public long nextOrd() { - if (set) { - return NO_MORE_ORDS; - } else { - set = true; - return in.getOrd(docID); - } + long v = currentOrd; + currentOrd = NO_MORE_ORDS; + return v; } @Override public void setDocument(int docID) { - this.docID = docID; - set = false; + currentOrd = ord = in.getOrd(docID); } @Override @@ -72,4 +68,19 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues { public long lookupTerm(BytesRef key) { return in.lookupTerm(key); } + + @Override + public long ordAt(int index) { + return ord; + } + + @Override + public int cardinality() { + return (int) (ord >>> 63) ^ 1; + } + + @Override + public TermsEnum termsEnum() { + return in.termsEnum(); + } } From 2fe64e65ac09f28d3e5614d22e4a94e284cdb94e Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Tue, 17 Jun 2014 05:45:07 +0000 Subject: [PATCH 5/5] SOLR-6175: DebugComponent throws NPE on shard exceptions when using shards.tolerant git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603061 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 ++ .../handler/component/DebugComponent.java | 9 +++++ .../DistributedDebugComponentTest.java | 33 +++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5ce100456ee..2568d616f2f 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -184,6 +184,9 @@ Bug Fixes * SOLR-5426: Fixed a bug in ReverseWildCardFilter that could cause InvalidTokenOffsetsException when highlighting. (Uwe Schindler, Arun Kumar, via hossman) +* SOLR-6175: DebugComponent throws NPE on shard exceptions when using shards.tolerant. + (Tomás Fernández Löbbe via shalin) + Other Changes --------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/DebugComponent.java b/solr/core/src/java/org/apache/solr/handler/component/DebugComponent.java index aa6ce962c50..6f5f44c1fea 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/DebugComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/DebugComponent.java @@ -206,6 +206,11 @@ public class DebugComponent extends SearchComponent for (ShardRequest sreq : rb.finished) { for (ShardResponse srsp : sreq.responses) { + if (srsp.getException() != null) { + // can't expect the debug content if there was an exception for this request + // this should only happen when using shards.tolerant=true + continue; + } NamedList sdebug = (NamedList)srsp.getSolrResponse().getResponse().get("debug"); info = (NamedList)merge(sdebug, info, EXCLUDE_SET); if ((sreq.purpose & ShardRequest.PURPOSE_GET_DEBUG) != 0) { @@ -257,6 +262,10 @@ public class DebugComponent extends SearchComponent private NamedList getTrackResponse(ShardResponse shardResponse) { NamedList namedList = new NamedList<>(); + if (shardResponse.getException() != null) { + namedList.add("Exception", shardResponse.getException().getMessage()); + return namedList; + } NamedList responseNL = shardResponse.getSolrResponse().getResponse(); @SuppressWarnings("unchecked") NamedList responseHeader = (NamedList)responseNL.get("responseHeader"); diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedDebugComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedDebugComponentTest.java index 67fd8594f59..833eb6d4f07 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedDebugComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedDebugComponentTest.java @@ -7,6 +7,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.commons.io.FileUtils; @@ -18,7 +19,9 @@ import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.CoreAdminRequest; import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.util.NamedList; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -96,6 +99,7 @@ public class DistributedDebugComponentTest extends SolrJettyTestBase { collection2 = null; jetty.stop(); jetty=null; + resetExceptionIgnores(); } @Test @@ -367,6 +371,35 @@ public class DistributedDebugComponentTest extends SolrJettyTestBase { assertSameKeys((NamedList)nonDistribResponse.getDebugMap().get("timing"), (NamedList)distribResponse.getDebugMap().get("timing")); } + public void testTolerantSearch() throws SolrServerException { + String badShard = "[ff01::0083]:3334"; + SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.set("debug", "true"); + query.set("distrib", "true"); + query.setFields("id", "text"); + query.set("shards", shard1 + "," + shard2 + "," + badShard); + try { + ignoreException("Server refused connection"); + // verify that the request would fail if shards.tolerant=false + collection1.query(query); + fail("Expecting exception"); + } catch (SolrException e) { + //expected + } + query.set(ShardParams.SHARDS_TOLERANT, "true"); + QueryResponse response = collection1.query(query); + assertTrue((Boolean)response.getResponseHeader().get("partialResults")); + @SuppressWarnings("unchecked") + NamedList badShardTrack = (NamedList) ((NamedList>) + ((NamedList>>)response.getDebugMap().get("track")).get("EXECUTE_QUERY")).get(badShard); + assertEquals("Unexpected response size for shard", 1, badShardTrack.size()); + Entry exception = badShardTrack.iterator().next(); + assertEquals("Expected key 'Exception' not found", "Exception", exception.getKey()); + assertTrue("Unexpected exception message", exception.getValue().contains("Server refused connection")); + unIgnoreException("Server refused connection"); + } + /** * Compares the same section on the two query responses */