From 0528621d2fe5ed5ad878ebc9466bb88594bc390e Mon Sep 17 00:00:00 2001 From: Bruno Roustant Date: Thu, 9 Jan 2020 09:46:20 +0100 Subject: [PATCH 1/7] LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). Closes #1160 --- lucene/CHANGES.txt | 2 + .../lucene/util/automaton/Automaton.java | 81 ++++++++++++++++--- .../automaton/MinimizationOperations.java | 5 +- .../lucene/util/automaton/RunAutomaton.java | 5 +- 4 files changed, 78 insertions(+), 15 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fcc359f7326..db1ff0b1d6c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -118,6 +118,8 @@ Optimizations * LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand) +* LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). (Bruno Roustant) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java index 6c317ebdff7..a8052dca2de 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -656,22 +656,77 @@ public class Automaton implements Accountable { * @return destination state, -1 if no matching outgoing transition */ public int step(int state, int label) { + return next(state, 0, label, null); + } + + /** + * Looks for the next transition that matches the provided label, assuming determinism. + *

+ * This method is similar to {@link #step(int, int)} but is used more efficiently + * when iterating over multiple transitions from the same source state. It keeps + * the latest reached transition index in {@code transition.transitionUpto} so + * the next call to this method can continue from there instead of restarting + * from the first transition. + * + * @param transition The transition to start the lookup from (inclusive, using its + * {@link Transition#source} and {@link Transition#transitionUpto}). + * It is updated with the matched transition; + * or with {@link Transition#dest} = -1 if no match. + * @param label The codepoint to look up. + * @return The destination state; or -1 if no matching outgoing transition. + */ + public int next(Transition transition, int label) { + return next(transition.source, transition.transitionUpto, label, transition); + } + + /** + * Looks for the next transition that matches the provided label, assuming determinism. + * + * @param state The source state. + * @param fromTransitionIndex The transition index to start the lookup from (inclusive); negative interpreted as 0. + * @param label The codepoint to look up. + * @param transition The output transition to update with the matching transition; or null for no update. + * @return The destination state; or -1 if no matching outgoing transition. + */ + private int next(int state, int fromTransitionIndex, int label, Transition transition) { assert state >= 0; assert label >= 0; - int trans = states[2*state]; - int limit = trans + 3*states[2*state+1]; - // TODO: we could do bin search; transitions are sorted - while (trans < limit) { - int dest = transitions[trans]; - int min = transitions[trans+1]; - int max = transitions[trans+2]; - if (min <= label && label <= max) { - return dest; - } - trans += 3; - } + int stateIndex = 2 * state; + int firstTransitionIndex = states[stateIndex]; + int numTransitions = states[stateIndex + 1]; - return -1; + // Since transitions are sorted, + // binary search the transition for which label is within [minLabel, maxLabel]. + int low = Math.max(fromTransitionIndex, 0); + int high = numTransitions - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + int transitionIndex = firstTransitionIndex + 3 * mid; + int minLabel = transitions[transitionIndex + 1]; + if (minLabel > label) { + high = mid - 1; + } else { + int maxLabel = transitions[transitionIndex + 2]; + if (maxLabel < label){ + low = mid + 1; + } else { + int destState = transitions[transitionIndex]; + if (transition != null) { + transition.dest = destState; + transition.min = minLabel; + transition.max = maxLabel; + transition.transitionUpto = mid; + } + return destState; + } + } + } + int destState = -1; + if (transition != null) { + transition.dest = destState; + transition.transitionUpto = low; + } + return destState; } /** Records new states and transitions and then {@link diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java index 979f7c52988..1962731b266 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java @@ -94,12 +94,15 @@ final public class MinimizationOperations { } } // find initial partition and reverse edges + Transition transition = new Transition(); for (int q = 0; q < statesLen; q++) { final int j = a.isAccept(q) ? 0 : 1; partition[j].add(q); block[q] = j; + transition.source = q; + transition.transitionUpto = -1; for (int x = 0; x < sigmaLen; x++) { - final ArrayList[] r = reverse[a.step(q, sigma[x])]; + final ArrayList[] r = reverse[a.next(transition, sigma[x])]; if (r[x] == null) { r[x] = new ArrayList<>(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java index a42588759f6..e105ac3aaeb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java @@ -78,10 +78,13 @@ public abstract class RunAutomaton implements Accountable { accept = new boolean[size]; transitions = new int[size * points.length]; Arrays.fill(transitions, -1); + Transition transition = new Transition(); for (int n=0;n Date: Mon, 13 Jan 2020 11:34:47 +0000 Subject: [PATCH 2/7] SOLR-13934: Improve SimplePostTool & bin/post docs (#1013) Co-Authored-By: RompotiMiranda --- solr/solr-ref-guide/src/post-tool.adoc | 29 +++++++++++++++----------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/solr/solr-ref-guide/src/post-tool.adoc b/solr/solr-ref-guide/src/post-tool.adoc index 3d736b52bab..140c2873612 100644 --- a/solr/solr-ref-guide/src/post-tool.adoc +++ b/solr/solr-ref-guide/src/post-tool.adoc @@ -159,20 +159,25 @@ bin/post -u solr:SolrRocks -c gettingstarted a.pdf == Post Tool Windows Support -`bin/post` exists currently only as a Unix shell script, however it delegates its work to a cross-platform capable Java program. The <> can be run directly in supported environments, including Windows. +`bin/post` is a Unix shell script and as such cannot be used directly on Windows. +However it delegates its work to a cross-platform capable Java program called "SimplePostTool" or `post.jar`, that can be used in Windows environments. -== SimplePostTool - -The `bin/post` script currently delegates to a standalone Java program called `SimplePostTool`. - -This tool, bundled into a executable JAR, can be run directly using `java -jar example/exampledocs/post.jar`. See the help output and take it from there to post files, recurse a website or file system folder, or send direct commands to a Solr server. +The argument syntax differs significantly from `bin/post`, so your first step should be to print the SimplePostTool help text. [source,plain] ---- -$ java -jar example/exampledocs/post.jar -h -SimplePostTool version 5.0.0 -Usage: java [SystemProperties] -jar post.jar [-h|-] [ [...]] -. -. -. +$ java -jar example\exampledocs\post.jar -h ---- + +This command prints information about all the arguments and System properties available to SimplePostTool users. +There are also examples showing how to post files, crawl a website or file system folder, and send update commands (deletes, etc.) directly to Solr. + +Most usage involves passing both Java System properties and program arguments on the command line. Consider the example below: + +[source,plain] +---- +$ java -jar -Dc=gettingstarted -Dauto example\exampledocs\post.jar example\exampledocs\* +---- + +This indexes the contents of the `exampledocs` directory into a collection called `gettingstarted`. +The `-Dauto` System property governs whether or not Solr sends the document type to Solr during extraction. From 6e4756fd48076abcecc94446ce3defeb3f907cc1 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Mon, 13 Jan 2020 06:43:21 -0500 Subject: [PATCH 3/7] SOLR-13890: Add "top-level" DV "terms" implementation (#1151) {!terms} queries have a docValues-based implementation that uses per-segment DV structures. This does well with a small to moderate (a few hundred) number of query terms, but doesn't well scale beyond that due to repetitive seeks done on each segment. This commit introduces an implementation that uses a "top-level" docValues structure, which scales much better to very large {!terms} queries (many hundreds, thousands of terms). --- solr/CHANGES.txt | 5 + .../solr/search/TermsQParserPlugin.java | 142 +++++++++++++-- .../solr/search/TestTermsQParserPlugin.java | 163 ++++++++++++++++++ solr/solr-ref-guide/src/other-parsers.adoc | 16 +- 4 files changed, 312 insertions(+), 14 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/search/TestTermsQParserPlugin.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index aa2081c969d..c39d8051479 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -193,6 +193,11 @@ Improvements * SOLR-14154: Return correct isolation level when retrieving it from the SQL Connection (Nick Vercammen, Kevin Risden) +* SOLR-13890: Add "top-level" DV implementation for {!terms} queries. This approach tends to be more efficient for + queries with very large numbers of terms. The new implementation is used by default for method=docValuesTermsFilter + terms queries that are searching 500 or more terms. Users wishing to ignore this heuristic can choose the + docValuesTermsFilterTopLevel or docValuesTermsFilterPerSegment methods directly. (Jason Gerlowski, Joel Bernstein, David Smiley) + Optimizations --------------------- (No changes) diff --git a/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java index 805cca33b06..1d92b7caa60 100644 --- a/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java @@ -16,29 +16,31 @@ */ package org.apache.solr.search; +import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.util.Arrays; import java.util.Locale; import java.util.regex.Pattern; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PrefixCodedTerms; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; -import org.apache.lucene.search.AutomatonQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.DocValuesTermsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermInSetQuery; -import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.*; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.LongBitSet; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.PointField; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Finds documents whose specified field has any of the specified values. It's like @@ -52,6 +54,7 @@ import org.apache.solr.schema.PointField; * Note that if no values are specified then the query matches no documents. */ public class TermsQParserPlugin extends QParserPlugin { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NAME = "terms"; /** The separator to use in the underlying suggester */ @@ -88,10 +91,29 @@ public class TermsQParserPlugin extends QParserPlugin { docValuesTermsFilter {//on 4x this is FieldCacheTermsFilter but we use the 5x name any way @Override Query makeFilter(String fname, BytesRef[] byteRefs) { - return new DocValuesTermsQuery(fname, byteRefs);//constant scores + // TODO Further tune this heuristic number + return (byteRefs.length > 700) ? docValuesTermsFilterTopLevel.makeFilter(fname, byteRefs) : docValuesTermsFilterPerSegment.makeFilter(fname, byteRefs); + } + }, + docValuesTermsFilterTopLevel { + @Override + Query makeFilter(String fname, BytesRef[] byteRefs) { + return disableCacheByDefault(new TopLevelDocValuesTermsQuery(fname, byteRefs)); + } + }, + docValuesTermsFilterPerSegment { + @Override + Query makeFilter(String fname, BytesRef[] byteRefs) { + return disableCacheByDefault(new DocValuesTermsQuery(fname, byteRefs)); } }; + private static Query disableCacheByDefault(Query q) { + final WrappedQuery wrappedQuery = new WrappedQuery(q); + wrappedQuery.setCache(false); + return wrappedQuery; + } + abstract Query makeFilter(String fname, BytesRef[] byteRefs); } @@ -101,7 +123,7 @@ public class TermsQParserPlugin extends QParserPlugin { @Override public Query parse() throws SyntaxError { String fname = localParams.get(QueryParsing.F); - FieldType ft = req.getSchema().getFieldTypeNoEx(fname); + FieldType ft = req.getSchema().getFieldType(fname); String separator = localParams.get(SEPARATOR, ","); String qstr = localParams.get(QueryParsing.V);//never null Method method = Method.valueOf(localParams.get(METHOD, Method.termsFilter.name())); @@ -119,7 +141,7 @@ public class TermsQParserPlugin extends QParserPlugin { if (ft.isPointField()) { if (localParams.get(METHOD) != null) { - throw new IllegalArgumentException( + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "Method '%s' not supported in TermsQParser when using PointFields", localParams.get(METHOD))); } return ((PointField)ft).getSetQuery(this, req.getSchema().getField(fname), Arrays.asList(splitVals)); @@ -142,4 +164,100 @@ public class TermsQParserPlugin extends QParserPlugin { } }; } + + private static class TopLevelDocValuesTermsQuery extends DocValuesTermsQuery { + private final String fieldName; + private SortedSetDocValues topLevelDocValues; + private LongBitSet topLevelTermOrdinals; + private boolean matchesAtLeastOneTerm = false; + + + public TopLevelDocValuesTermsQuery(String field, BytesRef... terms) { + super(field, terms); + this.fieldName = field; + } + + public Weight createWeight(IndexSearcher searcher, final ScoreMode scoreMode, float boost) throws IOException { + if (! (searcher instanceof SolrIndexSearcher)) { + log.debug("Falling back to DocValuesTermsQuery because searcher [{}] is not the required SolrIndexSearcher", searcher); + return super.createWeight(searcher, scoreMode, boost); + } + + topLevelDocValues = DocValues.getSortedSet(((SolrIndexSearcher)searcher).getSlowAtomicReader(), fieldName); + topLevelTermOrdinals = new LongBitSet(topLevelDocValues.getValueCount()); + PrefixCodedTerms.TermIterator iterator = getTerms().iterator(); + + long lastTermOrdFound = 0; + for(BytesRef term = iterator.next(); term != null; term = iterator.next()) { + long currentTermOrd = lookupTerm(topLevelDocValues, term, lastTermOrdFound); + if (currentTermOrd >= 0L) { + matchesAtLeastOneTerm = true; + topLevelTermOrdinals.set(currentTermOrd); + lastTermOrdFound = currentTermOrd; + } + } + + return new ConstantScoreWeight(this, boost) { + public Scorer scorer(LeafReaderContext context) throws IOException { + if (! matchesAtLeastOneTerm) { + return null; + } + + SortedSetDocValues segmentDocValues = context.reader().getSortedSetDocValues(fieldName); + if (segmentDocValues == null) { + return null; + } + + final int docBase = context.docBase; + return new ConstantScoreScorer(this, this.score(), scoreMode, new TwoPhaseIterator(segmentDocValues) { + public boolean matches() throws IOException { + topLevelDocValues.advanceExact(docBase + approximation.docID()); + for(long ord = topLevelDocValues.nextOrd(); ord != -1L; ord = topLevelDocValues.nextOrd()) { + if (topLevelTermOrdinals.get(ord)) { + return true; + } + } + + return false; + } + + public float matchCost() { + return 10.0F; + } + }); + + } + + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, new String[]{fieldName}); + } + }; + } + + /* + * Same binary-search based implementation as SortedSetDocValues.lookupTerm(BytesRef), but with an + * optimization to narrow the search space where possible by providing a startOrd instead of begining each search + * at 0. + */ + private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException { + long low = startOrd; + long high = docValues.getValueCount()-1; + + while (low <= high) { + long mid = (low + high) >>> 1; + final BytesRef term = docValues.lookupOrd(mid); + int cmp = term.compareTo(key); + + if (cmp < 0) { + low = mid + 1; + } else if (cmp > 0) { + high = mid - 1; + } else { + return mid; // key found + } + } + + return -(low + 1); // key not found. + } + } } diff --git a/solr/core/src/test/org/apache/solr/search/TestTermsQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestTermsQParserPlugin.java new file mode 100644 index 00000000000..5f5250c57c7 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestTermsQParserPlugin.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestTermsQParserPlugin extends SolrTestCaseJ4 { + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema.xml"); + + assertU(adoc("id","1", "author_s", "Lev Grossman", "t_title", "The Magicians", "cat_s", "fantasy", "pubyear_i", "2009")); + assertU(adoc("id", "2", "author_s", "Robert Jordan", "t_title", "The Eye of the World", "cat_s", "fantasy", "cat_s", "childrens", "pubyear_i", "1990")); + assertU(adoc("id", "3", "author_s", "Robert Jordan", "t_title", "The Great Hunt", "cat_s", "fantasy", "cat_s", "childrens", "pubyear_i", "1990")); + assertU(adoc("id", "4", "author_s", "N.K. Jemisin", "t_title", "The Fifth Season", "cat_s", "fantasy", "pubyear_i", "2015")); + assertU(commit()); + assertU(adoc("id", "5", "author_s", "Ursula K. Le Guin", "t_title", "The Dispossessed", "cat_s", "scifi", "pubyear_i", "1974")); + assertU(adoc("id", "6", "author_s", "Ursula K. Le Guin", "t_title", "The Left Hand of Darkness", "cat_s", "scifi", "pubyear_i", "1969")); + assertU(adoc("id", "7", "author_s", "Isaac Asimov", "t_title", "Foundation", "cat_s", "scifi", "pubyear_i", "1951")); + assertU(commit()); + } + + @Test + public void testTextTermsQuery() { + // Single term value + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "{!terms f=t_title}left"); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=1]", + "//result/doc[1]/str[@name='id'][.='6']" + ); + + // Multiple term values + params = new ModifiableSolrParams(); + params.add("q", "{!terms f=t_title}left,hunt"); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=2]", + "//result/doc[1]/str[@name='id'][.='3']", + "//result/doc[2]/str[@name='id'][.='6']" + ); + } + + @Test + public void testTermsUsingNonDefaultSeparator() { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "{!terms f=cat_s separator=|}childrens|scifi"); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=5]", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='5']", + "//result/doc[4]/str[@name='id'][.='6']", + "//result/doc[5]/str[@name='id'][.='7']" + ); + } + + class TermsParams { + public String method; + public boolean cache; + + public TermsParams(String method, boolean cache) { + this.method = method; + this.cache = cache; + } + + + public String buildQuery(String fieldName, String commaDelimitedTerms) { + return "{!terms f=" + fieldName + " method=" + method + " cache=" + cache + "}" + commaDelimitedTerms; + } + } + + @Test + public void testTermsMethodEquivalency() { + // Run queries with a variety of 'method' and postfilter options. + final TermsParams[] methods = new TermsParams[] { + new TermsParams("termsFilter", true), + new TermsParams("termsFilter", false), + new TermsParams("booleanQuery", true), + new TermsParams("booleanQuery", false), + new TermsParams("automaton", true), + new TermsParams("automaton", false), + new TermsParams("docValuesTermsFilter", true), + new TermsParams("docValuesTermsFilter", false), + new TermsParams("docValuesTermsFilterTopLevel", true), + new TermsParams("docValuesTermsFilterTopLevel", false), + new TermsParams("docValuesTermsFilterPerSegment", true), + new TermsParams("docValuesTermsFilterPerSegment", false) + }; + + for (TermsParams method : methods) { + // Single-valued field, single term value + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", method.buildQuery("author_s", "Robert Jordan")); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=2]", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']" + ); + + // Single-valued field, multiple term values + params = new ModifiableSolrParams(); + params.add("q", method.buildQuery("author_s", "Robert Jordan,Isaac Asimov")); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=3]", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='7']" + ); + + // Multi-valued field, single term value + params = new ModifiableSolrParams(); + params.add("q", method.buildQuery("cat_s", "childrens")); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=2]", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']" + ); + + // Multi-valued field, multiple term values + params = new ModifiableSolrParams(); + params.add("q", method.buildQuery("cat_s", "childrens,scifi")); + params.add("sort", "id asc"); + assertQ(req(params, "indent", "on"), "*[count(//doc)=5]", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='5']", + "//result/doc[4]/str[@name='id'][.='6']", + "//result/doc[5]/str[@name='id'][.='7']" + ); + + // Numeric field + params = new ModifiableSolrParams(); + params.add("q", method.buildQuery("pubyear_i", "2009")); + params.add("sort", "id asc"); + + // Test schema randomizes between Trie and Point. "terms" is supported for "trie" but not "Point" + final String numericFieldType = System.getProperty("solr.tests.IntegerFieldType"); + if (numericFieldType.contains("Point")) { + assertQEx("Expected 'terms' query on PointField to fail", req(params, "indent", "on"), 400); + } else { + assertQ(req(params, "indent", "on"), "*[count(//doc)=1]", "//result/doc[1]/str[@name='id'][.='1']"); + } + } + } +} diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index 389138c1bb5..fccc7a41fde 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -188,7 +188,7 @@ A list of queries that *must not* appear in matching documents. A list of queries *should* appear in matching documents. For a BooleanQuery with no `must` queries, one or more `should` queries must match a document for the BooleanQuery to match. `filter`:: -A list of queries that *must* appear in matching documents. However, unlike `must`, the score of filter queries is ignored. Also, these queries are cached in filter cache. To avoid caching add either `cache=false` as local parameter, or `"cache":"false"` property to underneath Query DLS Object. +A list of queries that *must* appear in matching documents. However, unlike `must`, the score of filter queries is ignored. Also, these queries are cached in filter cache. To avoid caching add either `cache=false` as local parameter, or `"cache":"false"` property to underneath Query DLS Object. *Examples* @@ -1031,7 +1031,19 @@ The field on which to search. This parameter is required. Separator to use when parsing the input. If set to " " (a single blank space), will trim additional white space from the input terms. Defaults to a comma (`,`). `method`:: -The internal query-building implementation: `termsFilter`, `booleanQuery`, `automaton`, or `docValuesTermsFilter`. Defaults to `termsFilter`. +An optional parameter used to determine which of several query implementations should be used by Solr. Options are restricted to: `termsFilter`, `booleanQuery`, `automaton`, `docValuesTermsFilterPerSegment`, `docValuesTermsFilterTopLevel` or `docValuesTermsFilter`. If unspecified, the default value is `termsFilter`. Each implementation has its own performance characteristics, and users are encouraged to experiment to determine which implementation is most performant for their use-case. Heuristics are given below. ++ +`booleanQuery` creates a `BooleanQuery` representing the request. Scales well with index size, but poorly with the number of terms being searched for. ++ +`termsFilter` the default `method`. Uses a `BooleanQuery` or a `TermInSetQuery` depending on the number of terms. Scales well with index size, but only moderately with the number of query terms. ++ +`docValuesTermsFilter` can only be used on fields with docValues data. The `cache` parameter is false by default. Chooses between the `docValuesTermsFilterTopLevel` and `docValuesTermsFilterPerSegment` methods using the number of query terms as a rough heuristic. Users should typically use this method instead of using `docValuesTermsFilterTopLevel` or `docValuesTermsFilterPerSegment` directly, unless they've done performance testing to validate one of the methods on queries of all sizes. Depending on the implementation picked, this method may rely on expensive data structures which are lazily populated after each commit. If you commit frequently and your use-case can tolerate a static warming query, consider adding one to `solrconfig.xml` so that this work is done as a part of the commit itself and not attached directly to user requests. ++ +`docValuesTermsFilterTopLevel` can only be used on fields with docValues data. The `cache` parameter is false by default. Uses top-level docValues data structures to find results. These data structures are more efficient as the number of query terms grows high (over several hundred). But they are also expensive to build and need to be populated lazily after each commit, causing a sometimes-noticeable slowdown on the first query after each commit. If you commit frequently and your use-case can tolerate a static warming query, consider adding one to `solrconfig.xml` so that this work is done as a part of the commit itself and not attached directly to user requests. ++ +`docValuesTermsFilterPerSegment` can only be used on fields with docValues data. The `cache` parameter is false by default. It is more efficient than the "top-level" alternative with small to medium (~500) numbers of query terms, and doesn't suffer a slowdown on queries immediately following a commit (as `docValuesTermsFilterTopLevel` does - see above). But it is less performant on very large numbers of query terms. ++ +`automaton` creates an `AutomatonQuery` representing the request with each term forming a union. Scales well with index size and moderately with the number of query terms. *Examples* From 5a73ad0178b5f65fecc57e49402c02dede65a6a0 Mon Sep 17 00:00:00 2001 From: 0xflotus <0xflotus@gmail.com> Date: Mon, 13 Jan 2020 15:22:04 +0100 Subject: [PATCH 4/7] Two minor Javadoc cleanups (#1002) --- .../java/org/apache/lucene/search/DisjunctionMaxQuery.java | 4 ++-- .../src/java/org/apache/solr/search/facet/FacetRange.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 4ffd3264fab..d934847484e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -262,9 +262,9 @@ public final class DisjunctionMaxQuery extends Query implements Iterable return buffer.toString(); } - /** Return true iff we represent the same query as o + /** Return true if we represent the same query as other * @param other another object - * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us + * @return true if other is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us */ @Override public boolean equals(Object other) { diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index b5f152188a0..5aaa0391500 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -113,7 +113,7 @@ class FacetRangeProcessor extends FacetProcessor { *

To demonstrate these, try something like: -
> java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles -index index -docs rec.food.recipes/soups +
> java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles -index index -docs rec.food.recipes/soups
adding rec.food.recipes/soups/abalone-chowder
  [ ... ] -

> java -cp lucene-core.jar:lucene-demo.jar:lucene-queryparser.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles +

> java -cp lucene-core.jar:lucene-demo.jar:lucene-queryparser.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles
Query: chowder
Searching for: chowder
34 total matching documents diff --git a/lucene/misc/src/java/overview.html b/lucene/misc/src/java/overview.html index 1423edd618d..8962cfcb8e2 100644 --- a/lucene/misc/src/java/overview.html +++ b/lucene/misc/src/java/overview.html @@ -49,7 +49,7 @@ Steps to build:

  • cd lucene/misc/ -
  • To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run ant build-native-unix. +
  • To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run ant build-native-unix.
  • libNativePosixUtil.so will be located in the lucene/build/native/ folder