diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c4b3521b784..d574a8a2861 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -45,6 +45,9 @@ Optimizations that have a facet value, so sparse faceting works as expected (Adrien Grand via Mike McCandless) +* LUCENE-7519: Add optimized APIs to compute browse-only top level + facets (Mike McCandless) + Other * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize) @@ -111,6 +114,10 @@ Improvements * LUCENE-7496: Better toString for SweetSpotSimilarity (janhoy) +* LUCENE-7520: Highlighter's WeightedSpanTermExtractor shouldn't attempt to expand a MultiTermQuery + when its field doesn't match the field the extraction is scoped to. + (Cao Manh Dat via David Smiley) + Optimizations * LUCENE-7501: BKDReader should not store the split dimension explicitly in the diff --git a/lucene/core/src/java/org/apache/lucene/index/LegacySortedNumericDocValuesWrapper.java b/lucene/core/src/java/org/apache/lucene/index/LegacySortedNumericDocValuesWrapper.java index cfb61e3c2d1..a75274e999e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LegacySortedNumericDocValuesWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/LegacySortedNumericDocValuesWrapper.java @@ -77,6 +77,7 @@ public final class LegacySortedNumericDocValuesWrapper extends SortedNumericDocV public boolean advanceExact(int target) throws IOException { docID = target; values.setDocument(docID); + upto = 0; return values.count() != 0; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java index 7bbe94aaaad..b959d25a0ec 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java @@ -36,7 +36,8 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; /** - * Default implementation of {@link SortedSetDocValuesFacetCounts} + * Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original + * {@link IndexReader} passed to the constructor is not closed whenever you use this class! */ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java index 4fff6a637b8..9ba85473d1c 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java @@ -18,6 +18,7 @@ package org.apache.lucene.facet.sortedset; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -33,11 +34,15 @@ import org.apache.lucene.facet.TopOrdAndIntQueue; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.ConjunctionDISI; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LongValues; @@ -77,6 +82,17 @@ public class SortedSetDocValuesFacetCounts extends Facets { count(hits.getMatchingDocs()); } + /** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */ + public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) + throws IOException { + this.state = state; + this.field = state.getField(); + dv = state.getDocValues(); + counts = new int[state.getSize()]; + //System.out.println("field=" + field); + countAll(); + } + @Override public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { if (topN <= 0) { @@ -176,7 +192,8 @@ public class SortedSetDocValuesFacetCounts extends Facets { continue; } - DocIdSetIterator docs = hits.bits.iterator(); + DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList( + hits.bits.iterator(), segValues)); // TODO: yet another option is to count all segs // first, only in seg-ord space, and then do a @@ -196,16 +213,12 @@ public class SortedSetDocValuesFacetCounts extends Facets { if (hits.totalHits < numSegOrds/10) { //System.out.println(" remap as-we-go"); // Remap every ord to global ord as we iterate: - int doc; - while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - //System.out.println(" doc=" + doc); - if (segValues.advanceExact(doc)) { - int term = (int) segValues.nextOrd(); - while (term != SortedSetDocValues.NO_MORE_ORDS) { - //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); - counts[(int) ordMap.get(term)]++; - term = (int) segValues.nextOrd(); - } + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + int term = (int) segValues.nextOrd(); + while (term != SortedSetDocValues.NO_MORE_ORDS) { + //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); + counts[(int) ordMap.get(term)]++; + term = (int) segValues.nextOrd(); } } } else { @@ -213,16 +226,12 @@ public class SortedSetDocValuesFacetCounts extends Facets { // First count in seg-ord space: final int[] segCounts = new int[numSegOrds]; - int doc; - while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - //System.out.println(" doc=" + doc); - if (segValues.advanceExact(doc)) { - int term = (int) segValues.nextOrd(); - while (term != SortedSetDocValues.NO_MORE_ORDS) { - //System.out.println(" ord=" + term); - segCounts[term]++; - term = (int) segValues.nextOrd(); - } + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + int term = (int) segValues.nextOrd(); + while (term != SortedSetDocValues.NO_MORE_ORDS) { + //System.out.println(" ord=" + term); + segCounts[term]++; + term = (int) segValues.nextOrd(); } } @@ -238,9 +247,76 @@ public class SortedSetDocValuesFacetCounts extends Facets { } else { // No ord mapping (e.g., single segment index): // just aggregate directly into counts: - int doc; - while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - if (segValues.advanceExact(doc)) { + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + int term = (int) segValues.nextOrd(); + while (term != SortedSetDocValues.NO_MORE_ORDS) { + counts[term]++; + term = (int) segValues.nextOrd(); + } + } + } + } + } + + /** Does all the "real work" of tallying up the counts. */ + private final void countAll() throws IOException { + //System.out.println("ssdv count"); + + MultiDocValues.OrdinalMap ordinalMap; + + // TODO: is this right? really, we need a way to + // verify that this ordinalMap "matches" the leaves in + // matchingDocs... + if (dv instanceof MultiDocValues.MultiSortedSetDocValues) { + ordinalMap = ((MultiSortedSetDocValues) dv).mapping; + } else { + ordinalMap = null; + } + + IndexReader origReader = state.getOrigReader(); + + for(LeafReaderContext context : origReader.leaves()) { + + LeafReader reader = context.reader(); + + SortedSetDocValues segValues = reader.getSortedSetDocValues(field); + if (segValues == null) { + continue; + } + + Bits liveDocs = reader.getLiveDocs(); + + if (ordinalMap != null) { + final LongValues ordMap = ordinalMap.getGlobalOrds(context.ord); + + int numSegOrds = (int) segValues.getValueCount(); + + // First count in seg-ord space: + final int[] segCounts = new int[numSegOrds]; + int docID; + while ((docID = segValues.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (liveDocs == null || liveDocs.get(docID)) { + int term = (int) segValues.nextOrd(); + while (term != SortedSetDocValues.NO_MORE_ORDS) { + segCounts[term]++; + term = (int) segValues.nextOrd(); + } + } + } + + // Then, migrate to global ords: + for(int ord=0;ord matchingDocs) throws IOException { for(MatchingDocs hits : matchingDocs) { BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName); @@ -82,4 +96,39 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets { rollup(); } + + private final void countAll(IndexReader reader) throws IOException { + for(LeafReaderContext context : reader.leaves()) { + BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName); + if (dv == null) { // this reader does not have DocValues for the requested category list + continue; + } + + Bits liveDocs = context.reader().getLiveDocs(); + + for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) { + if (liveDocs != null && liveDocs.get(doc) == false) { + continue; + } + final BytesRef bytesRef = dv.binaryValue(); + byte[] bytes = bytesRef.bytes; + int end = bytesRef.offset + bytesRef.length; + int ord = 0; + int offset = bytesRef.offset; + int prev = 0; + while (offset < end) { + byte b = bytes[offset++]; + if (b >= 0) { + prev = ord = ((ord << 7) | b) + prev; + ++values[ord]; + ord = 0; + } else { + ord = (ord << 7) | (b & 0x7F); + } + } + } + } + + rollup(); + } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java index d111b442bb6..e1903d122cb 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java @@ -76,7 +76,7 @@ public abstract class TaxonomyFacets extends Facets { protected FacetsConfig.DimConfig verifyDim(String dim) { FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim); if (!dimConfig.indexFieldName.equals(indexFieldName)) { - throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into field \"" + indexFieldName); + throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into field \"" + indexFieldName + "\""); } return dimConfig; } @@ -102,4 +102,4 @@ public abstract class TaxonomyFacets extends Facets { return results; } -} \ No newline at end of file +} diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java index 60beddd88d0..5aed22b4e84 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.facet.sortedset; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -72,12 +73,8 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); - - FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); - - SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); + SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state); assertEquals("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.getTopChildren(10, "a").toString()); assertEquals("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.getTopChildren(10, "b").toString()); @@ -171,9 +168,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); - FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); - SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); + SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state); // Ask for top 10 labels for any dims that have counts: List results = facets.getAllDims(10); @@ -215,9 +210,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); - FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); - SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); + SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state); // Ask for top 10 labels for any dims that have counts: assertEquals("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.getTopChildren(10, "a").toString()); @@ -312,4 +305,14 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { w.close(); IOUtils.close(searcher.getIndexReader(), indexDir, taxoDir); } + + private static SortedSetDocValuesFacetCounts getAllFacets(IndexSearcher searcher, SortedSetDocValuesReaderState state) throws IOException { + if (random().nextBoolean()) { + FacetsCollector c = new FacetsCollector(); + searcher.search(new MatchAllDocsQuery(), c); + return new SortedSetDocValuesFacetCounts(state, c); + } else { + return new SortedSetDocValuesFacetCounts(state); + } + } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java index 20bfdb5dc8e..3bb480d1fa9 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java @@ -17,6 +17,7 @@ package org.apache.lucene.facet.taxonomy; import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.HashMap; @@ -102,16 +103,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - // Aggregate the facet counts: - FacetsCollector c = new FacetsCollector(); - - // MatchAllDocsQuery is for "browsing" (counts facets - // for all non-deleted docs in the index); normally - // you'd use a "normal" query, and use MultiCollector to - // wrap collecting the "normal" hits and also facets: - searcher.search(new MatchAllDocsQuery(), c); - - Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config); // Retrieve & verify results: assertEquals("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.getTopChildren(10, "Publish Date").toString()); @@ -120,7 +112,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.add("Publish Date", "2010"); - c = new FacetsCollector(); + FacetsCollector c = new FacetsCollector(); searcher.search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.getTopChildren(10, "Author").toString()); @@ -185,11 +177,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); - - Facets facets = getTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); - + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config); + // Ask for top 10 labels for any dims that have counts: List results = facets.getAllDims(10); @@ -225,7 +214,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); + searcher.search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; @@ -301,15 +290,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - // Aggregate the facet counts: - FacetsCollector c = new FacetsCollector(); - - // MatchAllDocsQuery is for "browsing" (counts facets - // for all non-deleted docs in the index); normally - // you'd use a "normal" query, and use MultiCollector to - // wrap collecting the "normal" hits and also facets: - searcher.search(new MatchAllDocsQuery(), c); - Facets facets = getTaxonomyFacetCounts(taxoReader, config, c); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config); expectThrows(IllegalArgumentException.class, () -> { facets.getSpecificValue("a"); @@ -344,10 +325,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config); - Facets facets = getTaxonomyFacetCounts(taxoReader, config, c); assertEquals(1, facets.getSpecificValue("dim", "test\u001Fone")); assertEquals(1, facets.getSpecificValue("dim", "test\u001Etwo")); @@ -387,11 +366,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - - FacetsCollector c = new FacetsCollector(); - searcher.search(new MatchAllDocsQuery(), c); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config); - Facets facets = getTaxonomyFacetCounts(taxoReader, config, c); assertEquals(1, facets.getTopChildren(10, "dim").value); assertEquals(1, facets.getTopChildren(10, "dim2").value); assertEquals(1, facets.getTopChildren(10, "dim3").value); @@ -432,15 +408,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - // Aggregate the facet counts: - FacetsCollector c = new FacetsCollector(); - - // MatchAllDocsQuery is for "browsing" (counts facets - // for all non-deleted docs in the index); normally - // you'd use a "normal" query, and use MultiCollector to - // wrap collecting the "normal" hits and also facets: - searcher.search(new MatchAllDocsQuery(), c); - Facets facets = getTaxonomyFacetCounts(taxoReader, config, c); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config); FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim"); assertEquals(numLabels, result.labelValues.length); @@ -544,9 +512,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { DirectoryReader r = DirectoryReader.open(iw); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - FacetsCollector sfc = new FacetsCollector(); - newSearcher(r).search(new MatchAllDocsQuery(), sfc); - Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config); + for (FacetResult result : facets.getAllDims(10)) { assertEquals(r.numDocs(), result.value.intValue()); } @@ -572,10 +539,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { DirectoryReader r = DirectoryReader.open(iw); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - final FacetsCollector sfc = new FacetsCollector(); - newSearcher(r).search(new MatchAllDocsQuery(), sfc); - - Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config); + List res1 = facets.getAllDims(10); List res2 = facets.getAllDims(10); assertEquals("calling getFacetResults twice should return the .equals()=true result", res1, res2); @@ -601,9 +566,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { DirectoryReader r = DirectoryReader.open(iw); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - FacetsCollector sfc = new FacetsCollector(); - newSearcher(r).search(new MatchAllDocsQuery(), sfc); - Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc); + Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config); assertEquals(10, facets.getTopChildren(2, "a").childCount); @@ -754,4 +717,21 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { w.close(); IOUtils.close(tw, searcher.getIndexReader(), tr, indexDir, taxoDir); } + + private static Facets getAllFacets(String indexFieldName, IndexSearcher searcher, TaxonomyReader taxoReader, FacetsConfig config) throws IOException { + if (random().nextBoolean()) { + // Aggregate the facet counts: + FacetsCollector c = new FacetsCollector(); + + // MatchAllDocsQuery is for "browsing" (counts facets + // for all non-deleted docs in the index); normally + // you'd use a "normal" query, and use MultiCollector to + // wrap collecting the "normal" hits and also facets: + searcher.search(new MatchAllDocsQuery(), c); + + return new FastTaxonomyFacetCounts(taxoReader, config, c); + } else { + return new FastTaxonomyFacetCounts(indexFieldName, searcher.getIndexReader(), taxoReader, config); + } + } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 1b277f18cb5..0e0093b02e3 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -217,13 +217,14 @@ public class WeightedSpanTermExtractor { } else if (isQueryUnsupported(query.getClass())) { // nothing } else { + if (query instanceof MultiTermQuery && + (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery)query).getField()))) { + return; + } Query origQuery = query; final IndexReader reader = getLeafContext().reader(); Query rewritten; if (query instanceof MultiTermQuery) { - if (!expandMultiTermQuery) { - return; - } rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query); } else { rewritten = origQuery.rewrite(reader); @@ -508,11 +509,7 @@ public class WeightedSpanTermExtractor { */ public Map getWeightedSpanTerms(Query query, float boost, TokenStream tokenStream, String fieldName) throws IOException { - if (fieldName != null) { - this.fieldName = fieldName; - } else { - this.fieldName = null; - } + this.fieldName = fieldName; Map terms = new PositionCheckingMap<>(); this.tokenStream = tokenStream; diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index fc402bacff5..c37709b5eea 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -33,6 +33,7 @@ import java.util.StringTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockPayloadAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; @@ -1339,6 +1340,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte } + public void testNotRewriteMultiTermQuery() throws IOException { + // field "bar": (not the field we ultimately want to extract) + MultiTermQuery mtq = new TermRangeQuery("bar", new BytesRef("aa"), new BytesRef("zz"), true, true) ; + WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() { + @Override + protected void extract(Query query, float boost, Map terms) throws IOException { + assertEquals(mtq, query); + super.extract(query, boost, terms); + } + }; + extractor.setExpandMultiTermQuery(true); + extractor.setMaxDocCharsToAnalyze(51200); + extractor.getWeightedSpanTerms( + mtq, 3, new CannedTokenStream(new Token("aa",0,2), new Token("bb", 2,4)), "foo"); // field "foo" + } + public void testGetBestSingleFragmentWithWeights() throws Exception { TestHighlightRunner helper = new TestHighlightRunner() { diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f455002cf6e..8e6ee7ea19a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -25,7 +25,8 @@ Upgrading from Solr 6.x SolrHttpClientBuilder rather than an HttpClientConfigurer. * HttpClientUtil now allows configuring HttpClient instances via SolrHttpClientBuilder - rather than an HttpClientConfigurer. + rather than an HttpClientConfigurer. Use of env variable SOLR_AUTHENTICATION_CLIENT_CONFIGURER + no longer works, please use SOLR_AUTHENTICATION_CLIENT_BUILDER * SolrClient implementations now use their own internal configuration for socket timeouts, connect timeouts, and allowing redirects rather than what is set as the default when @@ -56,6 +57,8 @@ Optimizations check on every request and move connection lifecycle management towards the client. (Ryan Zezeski, Mark Miller, Shawn Heisey, Steve Davids) +* SOLR-9255: Rename SOLR_AUTHENTICATION_CLIENT_CONFIGURER -> SOLR_AUTHENTICATION_CLIENT_BUILDER (janhoy) + * SOLR-9579: Make Solr's SchemaField implement Lucene's IndexableFieldType, removing the creation of a Lucene FieldType every time a field is indexed. (John Call, yonik) @@ -95,7 +98,7 @@ Upgrade Notes * The create/deleteCollection methods on MiniSolrCloudCluster have been deprecated. Clients should instead use the CollectionAdminRequest API. In - addition, MiniSolrCloudCluster#uploadConfigSet(File, String) has been + addition, MiniSolrCloudCluster#uploadConfigDir(File, String) has been deprecated in favour of #uploadConfigSet(Path, String) * The bin/solr.in.sh (bin/solr.in.cmd on Windows) is now completely commented by default. Previously, this wasn't so, @@ -154,6 +157,9 @@ New Features * SOLR-9662: New parameter -u in bin/post to pass basicauth credentials (janhoy) +* SOLR-9654: Add "overrequest" parameter to JSON Facet API to control amount of overrequest + on a distributed terms facet. (yonik) + Bug Fixes ---------------------- @@ -202,6 +208,15 @@ Bug Fixes * SOLR-9518: Kerberos Delegation Tokens don't work without a chrooted ZK (Ishan Chattopadhyaya,via noble) +* SOLR-9687: Fixed Interval Facet count issue in cases of open/close intervals on the same values + (Andy Chillrud, Tomás Fernández Löbbe) + +* SOLR-9441: Solr collection backup on HDFS can only be manipulated by the Solr process owner. + (Hrishikesh Gadre via Mark Miller) + +* SOLR-9536: OldBackupDirectory timestamp field needs to be initialized to avoid NPE. + (Hrishikesh Gadre, hossman via Mark Miller) + Optimizations ---------------------- @@ -231,6 +246,7 @@ Optimizations * SOLR-9506: cache IndexFingerprint for each segment (Pushkar Raste, yonik, noble) +* SOLR-7506: Roll over GC logs by default via bin/solr scripts (shalin, janhoy) Other Changes ---------------------- @@ -329,7 +345,10 @@ Other Changes solr.log.X files are rotated, preserving solr.log from last run in solr.log.1, solr.log.1 => solr.log.2 etc solr-*-console.log files are moved into $SOLR_LOGS_DIR/archived/ instead of being overwritten Last JVM garbage collection log solr_gc.log is moved into $SOLR_LOGS_DIR/archived/ - (janhoy) + (janhoy) + +* SOLR-4531: Add tests to ensure that recovery does not fail on corrupted tlogs. + (Simon Scofield, Cao Manh Dat via shalin) ================== 6.2.1 ================== @@ -3283,7 +3302,7 @@ Bug Fixes while accessing other collections. (Shai Erera) * SOLR-7412: Fixed range.facet.other parameter for distributed requests. - (Will Miller, Tomás Fernándes Löbbe) + (Will Miller, Tomás Fernández Löbbe) * SOLR-6087: SolrIndexSearcher makes no DelegatingCollector.finish() call when IndexSearcher throws an expected exception. (Christine Poerschke via shalin) diff --git a/solr/bin/solr b/solr/bin/solr index 6aa57095106..9d55e0a41f8 100755 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -178,9 +178,13 @@ fi # Authentication options if [ "$SOLR_AUTHENTICATION_CLIENT_CONFIGURER" != "" ]; then - AUTHC_CLIENT_CONFIGURER_ARG="-Dsolr.authentication.httpclient.configurer=$SOLR_AUTHENTICATION_CLIENT_CONFIGURER" + echo "WARNING: Found unsupported configuration variable SOLR_AUTHENTICATION_CLIENT_CONFIGURER" + echo " Please start using SOLR_AUTHENTICATION_CLIENT_BUILDER instead" fi -AUTHC_OPTS="$AUTHC_CLIENT_CONFIGURER_ARG $SOLR_AUTHENTICATION_OPTS" +if [ "$SOLR_AUTHENTICATION_CLIENT_BUILDER" != "" ]; then + AUTHC_CLIENT_BUILDER_ARG="-Dsolr.authentication.httpclient.builder=$SOLR_AUTHENTICATION_CLIENT_BUILDER" +fi +AUTHC_OPTS="$AUTHC_CLIENT_BUILDER_ARG $SOLR_AUTHENTICATION_OPTS" # Set the SOLR_TOOL_HOST variable for use when connecting to a running Solr instance if [ "$SOLR_HOST" != "" ]; then @@ -1407,13 +1411,14 @@ if [ -z ${GC_LOG_OPTS+x} ]; then else GC_LOG_OPTS=($GC_LOG_OPTS) fi -# if verbose gc logging enabled, setup the location of the log file + +# if verbose gc logging enabled, setup the location of the log file and rotation if [ "$GC_LOG_OPTS" != "" ]; then gc_log_flag="-Xloggc" if [ "$JAVA_VENDOR" == "IBM J9" ]; then gc_log_flag="-Xverbosegclog" fi - GC_LOG_OPTS+=("$gc_log_flag:$SOLR_LOGS_DIR/solr_gc.log") + GC_LOG_OPTS+=("$gc_log_flag:$SOLR_LOGS_DIR/solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M) fi # If ZK_HOST is defined, the assume SolrCloud mode diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index 317a7890933..4ab188fe704 100644 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -1013,23 +1013,23 @@ IF NOT EXIST "%SOLR_SERVER_DIR%\tmp" ( ) IF "%JAVA_VENDOR%" == "IBM J9" ( - set "GCLOG_OPT=-Xverbosegclog" + set GCLOG_OPT="-Xverbosegclog:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M ) else ( - set "GCLOG_OPT=-Xloggc" + set GCLOG_OPT="-Xloggc:!SOLR_LOGS_DIR!\solr_gc.log" -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=9 -XX:GCLogFileSize=20M ) IF "%FG%"=="1" ( REM run solr in the foreground title "Solr-%SOLR_PORT%" echo %SOLR_PORT%>"%SOLR_TIP%"\bin\solr-%SOLR_PORT%.port - "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT%:"!SOLR_LOGS_DIR!/solr_gc.log" ^ + "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^ -Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^ -Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" ^ -Djetty.host=%SOLR_JETTY_HOST% -Djetty.port=%SOLR_PORT% -Djetty.home="%SOLR_SERVER_DIR%" ^ -Djava.io.tmpdir="%SOLR_SERVER_DIR%\tmp" -jar start.jar "%SOLR_JETTY_CONFIG%" ) ELSE ( START /B "Solr-%SOLR_PORT%" /D "%SOLR_SERVER_DIR%" ^ - "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT%:"!SOLR_LOGS_DIR!/solr_gc.log" ^ + "%JAVA%" %SERVEROPT% %SOLR_JAVA_MEM% %START_OPTS% %GCLOG_OPT% ^ -Dlog4j.configuration="%LOG4J_CONFIG%" -DSTOP.PORT=!STOP_PORT! -DSTOP.KEY=%STOP_KEY% ^ -Dsolr.log.muteconsole ^ -Dsolr.solr.home="%SOLR_HOME%" -Dsolr.install.dir="%SOLR_TIP%" ^ diff --git a/solr/bin/solr.in.sh b/solr/bin/solr.in.sh index 2fcaabb7a1a..40c59a6f1b9 100644 --- a/solr/bin/solr.in.sh +++ b/solr/bin/solr.in.sh @@ -105,8 +105,8 @@ #SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD= # Settings for authentication -#SOLR_AUTHENTICATION_CLIENT_CONFIGURER= -#SOLR_AUTHENTICATION_OPTS= +#SOLR_AUTHENTICATION_CLIENT_BUILDER= +#SOLR_AUTHENTICATION_OPTS="-Dbasicauth=solr:SolrRocks" # Settings for ZK ACL #SOLR_ZK_CREDS_AND_ACLS="-DzkACLProvider=org.apache.solr.common.cloud.VMParamsAllAndReadonlyDigestZkACLProvider \ diff --git a/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java b/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java index f12d9fdbb55..f46576527ba 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java +++ b/solr/core/src/java/org/apache/solr/core/backup/repository/HdfsBackupRepository.java @@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -41,6 +42,8 @@ import org.apache.solr.store.hdfs.HdfsDirectory.HdfsIndexInput; import com.google.common.base.Preconditions; public class HdfsBackupRepository implements BackupRepository { + private static final String HDFS_UMASK_MODE_PARAM = "solr.hdfs.permissions.umask-mode"; + private HdfsDirectoryFactory factory; private Configuration hdfsConfig = null; private FileSystem fileSystem = null; @@ -58,6 +61,12 @@ public class HdfsBackupRepository implements BackupRepository { factory.init(args); this.hdfsConfig = factory.getConf(); + // Configure the umask mode if specified. + if (args.get(HDFS_UMASK_MODE_PARAM) != null) { + String umaskVal = (String)args.get(HDFS_UMASK_MODE_PARAM); + this.hdfsConfig.set(FsPermission.UMASK_LABEL, umaskVal); + } + String hdfsSolrHome = (String) Preconditions.checkNotNull(args.get(HdfsDirectoryFactory.HDFS_HOME), "Please specify " + HdfsDirectoryFactory.HDFS_HOME + " property."); Path path = new Path(hdfsSolrHome); diff --git a/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java b/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java index 2b19116c926..79c5f09f7e9 100644 --- a/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java +++ b/solr/core/src/java/org/apache/solr/handler/OldBackupDirectory.java @@ -32,7 +32,7 @@ class OldBackupDirectory implements Comparable { private URI basePath; private String dirName; - private Optional timestamp; + private Optional timestamp = Optional.empty(); public OldBackupDirectory(URI basePath, String dirName) { this.dirName = Preconditions.checkNotNull(dirName); diff --git a/solr/core/src/java/org/apache/solr/request/IntervalFacets.java b/solr/core/src/java/org/apache/solr/request/IntervalFacets.java index dfe0f77f3af..14bf700bee7 100644 --- a/solr/core/src/java/org/apache/solr/request/IntervalFacets.java +++ b/solr/core/src/java/org/apache/solr/request/IntervalFacets.java @@ -157,7 +157,17 @@ public class IntervalFacets implements Iterable { if (o2.start == null) { return 1; } - return o1.start.compareTo(o2.start); + int startComparison = o1.start.compareTo(o2.start); + if (startComparison == 0) { + if (o1.startOpen != o2.startOpen) { + if (!o1.startOpen) { + return -1; + } else { + return 1; + } + } + } + return startComparison; } }); return sortedIntervals; diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java index 3f8cb0b02fd..c2cf0c20638 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -29,6 +29,7 @@ import org.apache.solr.schema.SchemaField; abstract class FacetRequestSorted extends FacetRequest { long offset; long limit; + int overrequest = -1; // Number of buckets to request beyond the limit to do internally during distributed search. -1 means default. long mincount; String sortVariable; SortDirection sortDirection; diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java index 432e1a7a715..9f9991918d3 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java @@ -110,11 +110,11 @@ public class FacetFieldMerger extends FacetRequestSortedMerger { sortBuckets(); - int first = (int)freq.offset; - int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; - int last = Math.min(sortedBuckets.size(), end); + long first = freq.offset; + long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; + long last = Math.min(sortedBuckets.size(), end); - List resultBuckets = new ArrayList<>(Math.max(0, (last - first))); + List resultBuckets = new ArrayList<>(Math.max(0, (int)(last - first))); /** this only works if there are no filters (like mincount) for (int i=first; i { } final int off = fcontext.isShard() ? 0 : (int) freq.offset; - // add a modest amount of over-request if this is a shard request - final int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE; + + long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow + if (freq.limit >= 0) { + effectiveLimit = freq.limit; + if (fcontext.isShard()) { + // add over-request if this is a shard request + if (freq.overrequest == -1) { + effectiveLimit = (long) (effectiveLimit*1.1+4); // default: add 10% plus 4 (to overrequest for very small limits) + } else { + effectiveLimit += freq.overrequest; + } + } + } + final int sortMul = freq.sortDirection.getMultiplier(); - int maxTopVals = (int) (lim >= 0 ? (long) off + lim : Integer.MAX_VALUE - 1); + int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(off + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1); maxTopVals = Math.min(maxTopVals, slotCardinality); final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc; final BiPredicate orderPredicate; @@ -258,7 +270,7 @@ abstract class FacetFieldProcessor extends FacetProcessor { bottom.slot = slotNum; bottom = queue.updateTop(); } - } else if (lim > 0) { + } else if (effectiveLimit > 0) { // queue not full Slot s = new Slot(); s.slot = slotNum; @@ -304,7 +316,7 @@ abstract class FacetFieldProcessor extends FacetProcessor { // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); - assert collectCount <= lim; + assert collectCount <= effectiveLimit; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 76d7d2a4f3f..40ca686482a 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -586,6 +586,7 @@ class FacetFieldParser extends FacetParser { facet.field = getField(m); facet.offset = getLong(m, "offset", facet.offset); facet.limit = getLong(m, "limit", facet.limit); + facet.overrequest = (int) getLong(m, "overrequest", facet.overrequest); if (facet.limit == 0) facet.offset = 0; // normalize. an offset with a limit of non-zero isn't useful. facet.mincount = getLong(m, "mincount", facet.mincount); facet.missing = getBoolean(m, "missing", facet.missing); diff --git a/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java index b791d3b4ffd..c16a0c7aa25 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java @@ -31,8 +31,14 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.util.ConcurrentLRUCache; -//Adds new fields to documents based on a template pattern specified via Template.field -// request parameters (multi-valued) or 'field' value specified in initArgs +/** +* Adds new fields to documents based on a template pattern specified via Template.field +* request parameters (multi-valued) or 'field' value specified in initArgs. +*

+* The format of the parameter is <field-name>:<the-template-string>, for example:
+* Template.field=fname:${somefield}some_string${someotherfield} +* +*/ public class TemplateUpdateProcessorFactory extends SimpleUpdateProcessorFactory { private Cache templateCache = new ConcurrentLRUCache<>(1000, 800, 900, 10, false, false, null); @Override diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 8180c441262..ebaeda808fa 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -281,10 +281,10 @@ public class SolrCLI { Class c = Class.forName(builderClassName); SolrHttpClientBuilder builder = (SolrHttpClientBuilder)c.newInstance(); HttpClientUtil.setHttpClientBuilder(builder); - log.info("Set HttpClientConfigurer from: "+builderClassName); + log.info("Set SolrHttpClientBuilder from: "+builderClassName); } catch (Exception ex) { log.error(ex.getMessage()); - throw new RuntimeException("Error during loading of configurer '"+builderClassName+"'.", ex); + throw new RuntimeException("Error during loading of builder '"+builderClassName+"'.", ex); } } @@ -3444,13 +3444,13 @@ public class SolrCLI { Files.createDirectories(archivePath); } List archived = Files.find(archivePath, 1, (f, a) - -> a.isRegularFile() && String.valueOf(f.getFileName()).startsWith("solr_gc_")) + -> a.isRegularFile() && String.valueOf(f.getFileName()).matches("^solr_gc[_.].+")) .collect(Collectors.toList()); for (Path p : archived) { Files.delete(p); } List files = Files.find(logsPath, 1, (f, a) - -> a.isRegularFile() && String.valueOf(f.getFileName()).startsWith("solr_gc_")) + -> a.isRegularFile() && String.valueOf(f.getFileName()).matches("^solr_gc[_.].+")) .collect(Collectors.toList()); if (files.size() > 0) { out("Archiving " + files.size() + " old GC log files to " + archivePath); diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java new file mode 100644 index 00000000000..2a7413c41e2 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.io.IOUtils; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.cloud.ClusterStateUtil; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.core.SolrCore; +import org.apache.solr.update.DirectUpdateHandler2; +import org.apache.solr.update.UpdateLog; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestCloudRecovery extends SolrCloudTestCase { + + private static final String COLLECTION = "collection1"; + + @BeforeClass + public static void setupCluster() throws Exception { + System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory"); + System.setProperty("solr.ulog.numRecordsToKeep", "1000"); + + configureCluster(2) + .addConfig("config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) + .configure(); + + CollectionAdminRequest + .createCollection(COLLECTION, "config", 2, 2) + .setMaxShardsPerNode(2) + .process(cluster.getSolrClient()); + AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(), + false, true, 30); + } + + @Before + public void resetCollection() throws IOException, SolrServerException { + cluster.getSolrClient().deleteByQuery(COLLECTION, "*:*"); + cluster.getSolrClient().commit(COLLECTION); + } + + @Test + public void leaderRecoverFromLogOnStartupTest() throws Exception { + AtomicInteger countReplayLog = new AtomicInteger(0); + DirectUpdateHandler2.commitOnClose = false; + UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet; + + CloudSolrClient cloudClient = cluster.getSolrClient(); + cloudClient.add(COLLECTION, sdoc("id", "1")); + cloudClient.add(COLLECTION, sdoc("id", "2")); + cloudClient.add(COLLECTION, sdoc("id", "3")); + cloudClient.add(COLLECTION, sdoc("id", "4")); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("q", "*:*"); + QueryResponse resp = cloudClient.query(COLLECTION, params); + assertEquals(0, resp.getResults().getNumFound()); + + ChaosMonkey.stop(cluster.getJettySolrRunners()); + assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000)); + ChaosMonkey.start(cluster.getJettySolrRunners()); + assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000)); + + resp = cloudClient.query(COLLECTION, params); + assertEquals(4, resp.getResults().getNumFound()); + // Make sure all nodes is recover from tlog + assertEquals(4, countReplayLog.get()); + } + + @Test + public void corruptedLogTest() throws Exception { + AtomicInteger countReplayLog = new AtomicInteger(0); + DirectUpdateHandler2.commitOnClose = false; + UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet; + + CloudSolrClient cloudClient = cluster.getSolrClient(); + cloudClient.add(COLLECTION, sdoc("id", "1000")); + cloudClient.add(COLLECTION, sdoc("id", "1001")); + for (int i = 0; i < 10; i++) { + cloudClient.add(COLLECTION, sdoc("id", String.valueOf(i))); + } + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("q", "*:*"); + QueryResponse resp = cloudClient.query(COLLECTION, params); + assertEquals(0, resp.getResults().getNumFound()); + + int logHeaderSize = Integer.MAX_VALUE; + Map contentFiles = new HashMap<>(); + for (JettySolrRunner solrRunner : cluster.getJettySolrRunners()) { + for (SolrCore solrCore : solrRunner.getCoreContainer().getCores()) { + File tlogFolder = new File(solrCore.getUlogDir(), UpdateLog.TLOG_NAME); + String[] tLogFiles = tlogFolder.list(); + Arrays.sort(tLogFiles); + File lastTLogFile = new File(tlogFolder.getAbsolutePath() + "/" + tLogFiles[tLogFiles.length - 1]); + byte[] tlogBytes = IOUtils.toByteArray(new FileInputStream(lastTLogFile)); + contentFiles.put(lastTLogFile, tlogBytes); + logHeaderSize = Math.min(tlogBytes.length, logHeaderSize); + } + } + + ChaosMonkey.stop(cluster.getJettySolrRunners()); + assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000)); + + for (Map.Entry entry : contentFiles.entrySet()) { + byte[] tlogBytes = entry.getValue(); + + if (tlogBytes.length <= logHeaderSize) continue; + FileOutputStream stream = new FileOutputStream(entry.getKey()); + int skipLastBytes = Math.max(random().nextInt(tlogBytes.length - logHeaderSize), 2); + for (int i = 0; i < entry.getValue().length - skipLastBytes; i++) { + stream.write(tlogBytes[i]); + } + stream.close(); + } + + ChaosMonkey.start(cluster.getJettySolrRunners()); + assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000)); + + resp = cloudClient.query(COLLECTION, params); + // Make sure cluster still healthy + assertTrue(resp.getResults().getNumFound() >= 2); + } + +} diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderRecoverFromLogOnStartup.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderRecoverFromLogOnStartup.java deleted file mode 100644 index 10de0424938..00000000000 --- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderRecoverFromLogOnStartup.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.cloud; - -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.cloud.ClusterStateUtil; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.update.DirectUpdateHandler2; -import org.apache.solr.update.UpdateLog; -import org.junit.Test; - -public class TestLeaderRecoverFromLogOnStartup extends AbstractFullDistribZkTestBase { - @Override - public void distribSetUp() throws Exception { - System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory"); - System.setProperty("solr.ulog.numRecordsToKeep", "1000"); - super.distribSetUp(); - } - - @Test - @ShardsFixed(num = 4) - public void test() throws Exception { - AtomicInteger countReplayLog = new AtomicInteger(0); - DirectUpdateHandler2.commitOnClose = false; - UpdateLog.testing_logReplayFinishHook = new Runnable() { - @Override - public void run() { - countReplayLog.incrementAndGet(); - } - }; - - String testCollectionName = "testCollection"; - createCollection(testCollectionName, 2, 2, 1); - waitForRecoveriesToFinish(false); - - cloudClient.setDefaultCollection(testCollectionName); - cloudClient.add(sdoc("id", "1")); - cloudClient.add(sdoc("id", "2")); - cloudClient.add(sdoc("id", "3")); - cloudClient.add(sdoc("id", "4")); - - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set("q", "*:*"); - QueryResponse resp = cloudClient.query(params); - assertEquals(0, resp.getResults().getNumFound()); - - ChaosMonkey.stop(jettys); - ChaosMonkey.stop(controlJetty); - assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000)); - ChaosMonkey.start(jettys); - ChaosMonkey.start(controlJetty); - assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), testCollectionName, 120000)); - - cloudClient.commit(); - resp = cloudClient.query(params); - assertEquals(4, resp.getResults().getNumFound()); - // Make sure all nodes is recover from tlog - assertEquals(4, countReplayLog.get()); - } -} diff --git a/solr/core/src/test/org/apache/solr/handler/TestHdfsBackupRestoreCore.java b/solr/core/src/test/org/apache/solr/handler/TestHdfsBackupRestoreCore.java index 4e8d4ccd584..a07d4919312 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestHdfsBackupRestoreCore.java +++ b/solr/core/src/test/org/apache/solr/handler/TestHdfsBackupRestoreCore.java @@ -27,7 +27,10 @@ import java.util.Map; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; @@ -88,6 +91,7 @@ public class TestHdfsBackupRestoreCore extends SolrCloudTestCase { " ${solr.hdfs.default.backup.path}\n" + " ${solr.hdfs.home:}\n" + " ${solr.hdfs.confdir:}\n" + + " ${solr.hdfs.permissions.umask-mode:000}\n" + " \n" + " \n" + " \n" + @@ -233,6 +237,13 @@ public class TestHdfsBackupRestoreCore extends SolrCloudTestCase { } //See if restore was successful by checking if all the docs are present again BackupRestoreUtils.verifyDocs(nDocs, masterClient, coreName); + + // Verify the permissions for the backup folder. + FileStatus status = fs.getFileStatus(new org.apache.hadoop.fs.Path("/backup/snapshot."+backupName)); + FsPermission perm = status.getPermission(); + assertEquals(FsAction.ALL, perm.getUserAction()); + assertEquals(FsAction.ALL, perm.getGroupAction()); + assertEquals(FsAction.ALL, perm.getOtherAction()); } } } diff --git a/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java b/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java index 68eac486495..5a4510f9193 100644 --- a/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java +++ b/solr/core/src/test/org/apache/solr/request/TestIntervalFaceting.java @@ -943,6 +943,9 @@ public class TestIntervalFaceting extends SolrTestCaseJ4 { assertIntervalQuery(field, "(0,2]", "2"); assertIntervalQuery(field, "[*,5]", "6"); assertIntervalQuery(field, "[*,3)", "3", "[2,5)", "3", "[6,8)", "2", "[3,*]", "7", "[10,10]", "1", "[10,10]", "1", "[10,10]", "1"); + assertIntervalQuery(field, "(5,*]", "4", "[5,5]", "1", "(*,5)", "5"); + assertIntervalQuery(field, "[5,5]", "1", "(*,5)", "5", "(5,*]", "4"); + assertIntervalQuery(field, "(5,*]", "4", "(*,5)", "5", "[5,5]", "1"); } @@ -955,7 +958,9 @@ public class TestIntervalFaceting extends SolrTestCaseJ4 { assertIntervalQuery(field, "[*,bird)", "2", "[bird,cat)", "1", "[cat,dog)", "2", "[dog,*]", "4"); assertIntervalQuery(field, "[*,*]", "9", "[*,dog)", "5", "[*,dog]", "8", "[dog,*]", "4"); assertIntervalQuery(field, field + ":dog", 3, "[*,*]", "3", "[*,dog)", "0", "[*,dog]", "3", "[dog,*]", "3", "[bird,cat]", "0"); - + assertIntervalQuery(field, "(*,dog)", "5", "[dog, dog]", "3", "(dog,*)", "1"); + assertIntervalQuery(field, "[dog, dog]", "3", "(dog,*)", "1", "(*,dog)", "5"); + assertIntervalQuery(field, "(dog,*)", "1", "(*,dog)", "5", "[dog, dog]", "3"); } /** diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index c83d308ed02..1c1a343c023 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -1147,6 +1147,43 @@ public class TestJsonFacets extends SolrTestCaseHS { ); + + if (!client.local()) { + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{" + + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0}" + + ",cat1:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:1}" + + ",catDef:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:-1}" + // -1 is default overrequest + ",catBig:{type:terms, field:${cat_s}, sort:'count desc', offset:1, limit:2147483647, overrequest:2147483647}" + // make sure overflows don't mess us up + "}" + ) + , "facets=={ count:6" + + ", cat0:{ buckets:[ {val:A,count:2} ] }" + // with no overrequest, we incorrectly conclude that A is the top bucket + ", cat1:{ buckets:[ {val:B,count:3} ] }" + + ", catDef:{ buckets:[ {val:B,count:3} ] }" + + ", catBig:{ buckets:[ {val:A,count:2} ] }" + + "}" + ); + } else { + // In non-distrib mode, should still be able to specify overrequest, but it shouldn't matter. + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{" + + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0}" + + ",cat1:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:1}" + + ",catDef:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:-1}" + // -1 is default overrequest + ",catBig:{type:terms, field:${cat_s}, sort:'count desc', offset:1, limit:2147483647, overrequest:2147483647}" + // make sure overflows don't mess us up + "}" + ) + , "facets=={ count:6" + + ", cat0:{ buckets:[ {val:B,count:3} ] }" + // only change from distrib-mode test above + ", cat1:{ buckets:[ {val:B,count:3} ] }" + + ", catDef:{ buckets:[ {val:B,count:3} ] }" + + ", catBig:{ buckets:[ {val:A,count:2} ] }" + + "}" + ); + } + + } diff --git a/solr/core/src/test/org/apache/solr/util/UtilsToolTest.java b/solr/core/src/test/org/apache/solr/util/UtilsToolTest.java index 6b2d31cdeae..0ca65edf108 100644 --- a/solr/core/src/test/org/apache/solr/util/UtilsToolTest.java +++ b/solr/core/src/test/org/apache/solr/util/UtilsToolTest.java @@ -55,6 +55,10 @@ public class UtilsToolTest extends SolrTestCaseJ4 { "solr_log_20160304", "solr-8983-console.log", "solr_gc_log_20160102", + "solr_gcnotremove", + "solr_gc.log", + "solr_gc.log.0", + "solr_gc.log.0.current", "solr_gc_log_2"); @Before @@ -136,7 +140,7 @@ public class UtilsToolTest extends SolrTestCaseJ4 { String[] args = {"utils", "-archive_gc_logs", "-l", dir.toString()}; assertEquals(files.size(), fileCount()); assertEquals(0, runTool(args)); - assertEquals(files.size()-2, fileCount()); + assertEquals(files.size()-5, fileCount()); assertFalse(listFiles().contains("solr_gc_log_2")); assertTrue(Files.exists(dir.resolve("archived").resolve("solr_gc_log_2"))); assertEquals(0, runTool(args));