mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master' into gradle-master
This commit is contained in:
commit
3008dd9526
|
@ -118,6 +118,8 @@ Optimizations
|
|||
|
||||
* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
|
||||
|
||||
* LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). (Bruno Roustant)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ algorithm.
|
|||
String[] tracer = new String[] { "A", "B", "C", "D", "E" };
|
||||
String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
|
||||
String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
|
||||
for (int i = 0 ; i < data.length ; ++i) {
|
||||
for (int i = 0 ; i < data.length ; ++i) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("tracer", tracer[i], Field.Store.YES, Field.Index.NO));
|
||||
doc.add(new Field("contents", data[i], Field.Store.NO, Field.Index.ANALYZED));
|
||||
|
@ -160,7 +160,7 @@ algorithm.
|
|||
sort.setSort(new SortField("contents", SortField.STRING));
|
||||
Query query = new MatchAllDocsQuery();
|
||||
ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs;
|
||||
for (int i = 0 ; i < result.length ; ++i) {
|
||||
for (int i = 0 ; i < result.length ; ++i) {
|
||||
Document doc = searcher.doc(result[i].doc);
|
||||
assertEquals(sortedTracerOrder[i], doc.getValues("tracer")[0]);
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.Locale;
|
|||
* files, roughly following the conventions of IPADIC. JapaneseTokenizer uses dictionaries built
|
||||
* with this tool. Note that the input files required by this build generally must be generated from
|
||||
* a corpus of real text using tools that are not part of Lucene. </p>
|
||||
* @lucene.experimenal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DictionaryBuilder {
|
||||
|
||||
|
|
|
@ -202,7 +202,7 @@
|
|||
<property name="javadoc.noindex" value="true"/>
|
||||
|
||||
<!---TODO: Fix accessibility (order of H1/H2/H3 headings), see https://issues.apache.org/jira/browse/LUCENE-8729 -->
|
||||
<property name="javadoc.doclint.args" value="-Xdoclint:all -Xdoclint:-missing -Xdoclint:-accessibility"/>
|
||||
<property name="javadoc.doclint.args" value="-Xdoclint:all,-missing,-accessibility,-html"/>
|
||||
<!---proc:none was added because of LOG4J2-1925 / JDK-8186647 -->
|
||||
<property name="javac.doclint.args" value="-Xdoclint:all/protected -Xdoclint:-missing -Xdoclint:-accessibility -proc:none"/>
|
||||
|
||||
|
|
|
@ -262,9 +262,9 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
return buffer.toString();
|
||||
}
|
||||
|
||||
/** Return true iff we represent the same query as o
|
||||
/** Return true if we represent the same query as other
|
||||
* @param other another object
|
||||
* @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
|
||||
* @return true if other is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
|
|
@ -656,22 +656,77 @@ public class Automaton implements Accountable {
|
|||
* @return destination state, -1 if no matching outgoing transition
|
||||
*/
|
||||
public int step(int state, int label) {
|
||||
return next(state, 0, label, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next transition that matches the provided label, assuming determinism.
|
||||
* <p>
|
||||
* This method is similar to {@link #step(int, int)} but is used more efficiently
|
||||
* when iterating over multiple transitions from the same source state. It keeps
|
||||
* the latest reached transition index in {@code transition.transitionUpto} so
|
||||
* the next call to this method can continue from there instead of restarting
|
||||
* from the first transition.
|
||||
*
|
||||
* @param transition The transition to start the lookup from (inclusive, using its
|
||||
* {@link Transition#source} and {@link Transition#transitionUpto}).
|
||||
* It is updated with the matched transition;
|
||||
* or with {@link Transition#dest} = -1 if no match.
|
||||
* @param label The codepoint to look up.
|
||||
* @return The destination state; or -1 if no matching outgoing transition.
|
||||
*/
|
||||
public int next(Transition transition, int label) {
|
||||
return next(transition.source, transition.transitionUpto, label, transition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next transition that matches the provided label, assuming determinism.
|
||||
*
|
||||
* @param state The source state.
|
||||
* @param fromTransitionIndex The transition index to start the lookup from (inclusive); negative interpreted as 0.
|
||||
* @param label The codepoint to look up.
|
||||
* @param transition The output transition to update with the matching transition; or null for no update.
|
||||
* @return The destination state; or -1 if no matching outgoing transition.
|
||||
*/
|
||||
private int next(int state, int fromTransitionIndex, int label, Transition transition) {
|
||||
assert state >= 0;
|
||||
assert label >= 0;
|
||||
int trans = states[2*state];
|
||||
int limit = trans + 3*states[2*state+1];
|
||||
// TODO: we could do bin search; transitions are sorted
|
||||
while (trans < limit) {
|
||||
int dest = transitions[trans];
|
||||
int min = transitions[trans+1];
|
||||
int max = transitions[trans+2];
|
||||
if (min <= label && label <= max) {
|
||||
return dest;
|
||||
}
|
||||
trans += 3;
|
||||
}
|
||||
int stateIndex = 2 * state;
|
||||
int firstTransitionIndex = states[stateIndex];
|
||||
int numTransitions = states[stateIndex + 1];
|
||||
|
||||
return -1;
|
||||
// Since transitions are sorted,
|
||||
// binary search the transition for which label is within [minLabel, maxLabel].
|
||||
int low = Math.max(fromTransitionIndex, 0);
|
||||
int high = numTransitions - 1;
|
||||
while (low <= high) {
|
||||
int mid = (low + high) >>> 1;
|
||||
int transitionIndex = firstTransitionIndex + 3 * mid;
|
||||
int minLabel = transitions[transitionIndex + 1];
|
||||
if (minLabel > label) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
int maxLabel = transitions[transitionIndex + 2];
|
||||
if (maxLabel < label){
|
||||
low = mid + 1;
|
||||
} else {
|
||||
int destState = transitions[transitionIndex];
|
||||
if (transition != null) {
|
||||
transition.dest = destState;
|
||||
transition.min = minLabel;
|
||||
transition.max = maxLabel;
|
||||
transition.transitionUpto = mid;
|
||||
}
|
||||
return destState;
|
||||
}
|
||||
}
|
||||
}
|
||||
int destState = -1;
|
||||
if (transition != null) {
|
||||
transition.dest = destState;
|
||||
transition.transitionUpto = low;
|
||||
}
|
||||
return destState;
|
||||
}
|
||||
|
||||
/** Records new states and transitions and then {@link
|
||||
|
|
|
@ -94,12 +94,15 @@ final public class MinimizationOperations {
|
|||
}
|
||||
}
|
||||
// find initial partition and reverse edges
|
||||
Transition transition = new Transition();
|
||||
for (int q = 0; q < statesLen; q++) {
|
||||
final int j = a.isAccept(q) ? 0 : 1;
|
||||
partition[j].add(q);
|
||||
block[q] = j;
|
||||
transition.source = q;
|
||||
transition.transitionUpto = -1;
|
||||
for (int x = 0; x < sigmaLen; x++) {
|
||||
final ArrayList<Integer>[] r = reverse[a.step(q, sigma[x])];
|
||||
final ArrayList<Integer>[] r = reverse[a.next(transition, sigma[x])];
|
||||
if (r[x] == null) {
|
||||
r[x] = new ArrayList<>();
|
||||
}
|
||||
|
|
|
@ -78,10 +78,13 @@ public abstract class RunAutomaton implements Accountable {
|
|||
accept = new boolean[size];
|
||||
transitions = new int[size * points.length];
|
||||
Arrays.fill(transitions, -1);
|
||||
Transition transition = new Transition();
|
||||
for (int n=0;n<size;n++) {
|
||||
accept[n] = a.isAccept(n);
|
||||
transition.source = n;
|
||||
transition.transitionUpto = -1;
|
||||
for (int c = 0; c < points.length; c++) {
|
||||
int dest = a.step(n, points[c]);
|
||||
int dest = a.next(transition, points[c]);
|
||||
assert dest == -1 || dest < size;
|
||||
transitions[n * points.length + c] = dest;
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ to check if the results are what we expect):</p>
|
|||
ScoreDoc[] hits = isearcher.search(query, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
// Iterate through the results:
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
Document hitDoc = isearcher.doc(hits[i].doc);
|
||||
assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
|
||||
}
|
||||
|
@ -147,11 +147,11 @@ index for all the files contained in a directory.</li>
|
|||
queries and searches an index.</li>
|
||||
</ul>
|
||||
To demonstrate these, try something like:
|
||||
<blockquote><tt>> <b>java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles -index index -docs rec.food.recipes/soups</b></tt>
|
||||
<blockquote><tt>> <b>java -cp lucene-core.jar:lucene-demo.jar:lucene-analyzers-common.jar org.apache.lucene.demo.IndexFiles -index index -docs rec.food.recipes/soups</b></tt>
|
||||
<br><tt>adding rec.food.recipes/soups/abalone-chowder</tt>
|
||||
<br><tt> </tt>[ ... ]
|
||||
|
||||
<p><tt>> <b>java -cp lucene-core.jar:lucene-demo.jar:lucene-queryparser.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles</b></tt>
|
||||
<p><tt>> <b>java -cp lucene-core.jar:lucene-demo.jar:lucene-queryparser.jar:lucene-analyzers-common.jar org.apache.lucene.demo.SearchFiles</b></tt>
|
||||
<br><tt>Query: <b>chowder</b></tt>
|
||||
<br><tt>Searching for: chowder</tt>
|
||||
<br><tt>34 total matching documents</tt>
|
||||
|
|
|
@ -49,7 +49,7 @@ Steps to build:
|
|||
<ul>
|
||||
<li> <tt>cd lucene/misc/</tt>
|
||||
|
||||
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run<tt> ant build-native-unix</tt>.
|
||||
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run<tt> ant build-native-unix</tt>.
|
||||
|
||||
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
|
||||
|
||||
|
|
|
@ -77,6 +77,10 @@ Upgrade Notes
|
|||
|
||||
* SOLR-14092: Deprecated BlockJoinFacetComponent and BlockJoinDocSetFacetComponent are removed
|
||||
Users are encouraged to migrate to uniqueBlock() in JSON Facet API. (Mikhail Khludnev)
|
||||
|
||||
* SOLR-13985: Solr's Jetty now binds to localhost network interface by default for better out of the box security.
|
||||
Administrators that need Solr exposed more broadly can change the SOLR_JETTY_HOST property in their Solr include
|
||||
(solr.in.sh/solr.in.cmd) file. (Jason Gerlowski, David Smiley, Robert Muir)
|
||||
|
||||
Improvements
|
||||
----------------------
|
||||
|
@ -193,6 +197,11 @@ Improvements
|
|||
|
||||
* SOLR-14154: Return correct isolation level when retrieving it from the SQL Connection (Nick Vercammen, Kevin Risden)
|
||||
|
||||
* SOLR-13890: Add "top-level" DV implementation for {!terms} queries. This approach tends to be more efficient for
|
||||
queries with very large numbers of terms. The new implementation is used by default for method=docValuesTermsFilter
|
||||
terms queries that are searching 500 or more terms. Users wishing to ignore this heuristic can choose the
|
||||
docValuesTermsFilterTopLevel or docValuesTermsFilterPerSegment methods directly. (Jason Gerlowski, Joel Bernstein, David Smiley)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
|
|
@ -1818,6 +1818,9 @@ fi
|
|||
|
||||
if [ "$SOLR_HOST" != "" ]; then
|
||||
SOLR_HOST_ARG=("-Dhost=$SOLR_HOST")
|
||||
elif [[ -z "$SOLR_JETTY_HOST" || "$SOLR_JETTY_HOST" == "127.0.0.1" ]]; then
|
||||
# Jetty will only bind on localhost interface, so nodes must advertise themselves with localhost
|
||||
SOLR_HOST_ARG=("-Dhost=localhost")
|
||||
else
|
||||
SOLR_HOST_ARG=()
|
||||
fi
|
||||
|
@ -1878,6 +1881,10 @@ if [ -z "$SOLR_PORT" ]; then
|
|||
SOLR_PORT=8983
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_JETTY_HOST" ]; then
|
||||
SOLR_OPTS+=("-Dsolr.jetty.host=$SOLR_JETTY_HOST")
|
||||
fi
|
||||
|
||||
if [ -z "$STOP_PORT" ]; then
|
||||
STOP_PORT=`expr $SOLR_PORT - 1000`
|
||||
fi
|
||||
|
|
4150
solr/bin/solr.cmd
4150
solr/bin/solr.cmd
File diff suppressed because it is too large
Load Diff
|
@ -105,13 +105,17 @@ REM set SOLR_LOG_PRESTART_ROTATION=false
|
|||
REM Enables jetty request log for all requests
|
||||
REM set SOLR_REQUESTLOG_ENABLED=false
|
||||
|
||||
REM Set the host interface to listen on. Jetty will listen on all interfaces (0.0.0.0) by default.
|
||||
REM This must be an IPv4 ("a.b.c.d") or bracketed IPv6 ("[x::y]") address, not a hostname!
|
||||
REM set SOLR_JETTY_HOST=0.0.0.0
|
||||
|
||||
REM Sets the port Solr binds to, default is 8983
|
||||
REM set SOLR_PORT=8983
|
||||
|
||||
REM Sets the network interface the Solr binds to. To prevent administrators from
|
||||
REM accidentally exposing Solr more widely than intended, this defaults to 127.0.0.1.
|
||||
REM Administrators should think carefully about their deployment environment and
|
||||
REM set this value as narrowly as required before going to production. In
|
||||
REM environments where security is not a concern, 0.0.0.0 can be used to allow
|
||||
REM Solr to accept connections on all network interfaces.
|
||||
REM set SOLR_JETTY_HOST=127.0.0.1
|
||||
|
||||
REM Restrict access to solr by IP address.
|
||||
REM Specify a comma-separated list of addresses or networks, for example:
|
||||
REM 127.0.0.1, 192.168.0.0/24, [::1], [2000:123:4:5::]/64
|
||||
|
|
|
@ -139,6 +139,14 @@
|
|||
# 127.0.0.1, 192.168.0.0/24, [::1], [2000:123:4:5::]/64
|
||||
#SOLR_IP_BLACKLIST=
|
||||
|
||||
# Sets the network interface the Solr binds to. To prevent administrators from
|
||||
# accidentally exposing Solr more widely than intended, this defaults to 127.0.0.1.
|
||||
# Administrators should think carefully about their deployment environment and
|
||||
# set this value as narrowly as required before going to production. In
|
||||
# environments where security is not a concern, 0.0.0.0 can be used to allow
|
||||
# Solr to accept connections on all network interfaces.
|
||||
#SOLR_JETTY_HOST="127.0.0.1"
|
||||
|
||||
# Enables HTTPS. It is implictly true if you set SOLR_SSL_KEY_STORE. Use this config
|
||||
# to enable https module with custom jetty configuration.
|
||||
#SOLR_SSL_ENABLED=true
|
||||
|
|
|
@ -16,29 +16,31 @@
|
|||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PrefixCodedTerms;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DocValuesTermsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.PointField;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Finds documents whose specified field has any of the specified values. It's like
|
||||
|
@ -52,6 +54,7 @@ import org.apache.solr.schema.PointField;
|
|||
* Note that if no values are specified then the query matches no documents.
|
||||
*/
|
||||
public class TermsQParserPlugin extends QParserPlugin {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
public static final String NAME = "terms";
|
||||
|
||||
/** The separator to use in the underlying suggester */
|
||||
|
@ -88,10 +91,29 @@ public class TermsQParserPlugin extends QParserPlugin {
|
|||
docValuesTermsFilter {//on 4x this is FieldCacheTermsFilter but we use the 5x name any way
|
||||
@Override
|
||||
Query makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
return new DocValuesTermsQuery(fname, byteRefs);//constant scores
|
||||
// TODO Further tune this heuristic number
|
||||
return (byteRefs.length > 700) ? docValuesTermsFilterTopLevel.makeFilter(fname, byteRefs) : docValuesTermsFilterPerSegment.makeFilter(fname, byteRefs);
|
||||
}
|
||||
},
|
||||
docValuesTermsFilterTopLevel {
|
||||
@Override
|
||||
Query makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
return disableCacheByDefault(new TopLevelDocValuesTermsQuery(fname, byteRefs));
|
||||
}
|
||||
},
|
||||
docValuesTermsFilterPerSegment {
|
||||
@Override
|
||||
Query makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
return disableCacheByDefault(new DocValuesTermsQuery(fname, byteRefs));
|
||||
}
|
||||
};
|
||||
|
||||
private static Query disableCacheByDefault(Query q) {
|
||||
final WrappedQuery wrappedQuery = new WrappedQuery(q);
|
||||
wrappedQuery.setCache(false);
|
||||
return wrappedQuery;
|
||||
}
|
||||
|
||||
abstract Query makeFilter(String fname, BytesRef[] byteRefs);
|
||||
}
|
||||
|
||||
|
@ -101,7 +123,7 @@ public class TermsQParserPlugin extends QParserPlugin {
|
|||
@Override
|
||||
public Query parse() throws SyntaxError {
|
||||
String fname = localParams.get(QueryParsing.F);
|
||||
FieldType ft = req.getSchema().getFieldTypeNoEx(fname);
|
||||
FieldType ft = req.getSchema().getFieldType(fname);
|
||||
String separator = localParams.get(SEPARATOR, ",");
|
||||
String qstr = localParams.get(QueryParsing.V);//never null
|
||||
Method method = Method.valueOf(localParams.get(METHOD, Method.termsFilter.name()));
|
||||
|
@ -119,7 +141,7 @@ public class TermsQParserPlugin extends QParserPlugin {
|
|||
|
||||
if (ft.isPointField()) {
|
||||
if (localParams.get(METHOD) != null) {
|
||||
throw new IllegalArgumentException(
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
String.format(Locale.ROOT, "Method '%s' not supported in TermsQParser when using PointFields", localParams.get(METHOD)));
|
||||
}
|
||||
return ((PointField)ft).getSetQuery(this, req.getSchema().getField(fname), Arrays.asList(splitVals));
|
||||
|
@ -142,4 +164,100 @@ public class TermsQParserPlugin extends QParserPlugin {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static class TopLevelDocValuesTermsQuery extends DocValuesTermsQuery {
|
||||
private final String fieldName;
|
||||
private SortedSetDocValues topLevelDocValues;
|
||||
private LongBitSet topLevelTermOrdinals;
|
||||
private boolean matchesAtLeastOneTerm = false;
|
||||
|
||||
|
||||
public TopLevelDocValuesTermsQuery(String field, BytesRef... terms) {
|
||||
super(field, terms);
|
||||
this.fieldName = field;
|
||||
}
|
||||
|
||||
public Weight createWeight(IndexSearcher searcher, final ScoreMode scoreMode, float boost) throws IOException {
|
||||
if (! (searcher instanceof SolrIndexSearcher)) {
|
||||
log.debug("Falling back to DocValuesTermsQuery because searcher [{}] is not the required SolrIndexSearcher", searcher);
|
||||
return super.createWeight(searcher, scoreMode, boost);
|
||||
}
|
||||
|
||||
topLevelDocValues = DocValues.getSortedSet(((SolrIndexSearcher)searcher).getSlowAtomicReader(), fieldName);
|
||||
topLevelTermOrdinals = new LongBitSet(topLevelDocValues.getValueCount());
|
||||
PrefixCodedTerms.TermIterator iterator = getTerms().iterator();
|
||||
|
||||
long lastTermOrdFound = 0;
|
||||
for(BytesRef term = iterator.next(); term != null; term = iterator.next()) {
|
||||
long currentTermOrd = lookupTerm(topLevelDocValues, term, lastTermOrdFound);
|
||||
if (currentTermOrd >= 0L) {
|
||||
matchesAtLeastOneTerm = true;
|
||||
topLevelTermOrdinals.set(currentTermOrd);
|
||||
lastTermOrdFound = currentTermOrd;
|
||||
}
|
||||
}
|
||||
|
||||
return new ConstantScoreWeight(this, boost) {
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
if (! matchesAtLeastOneTerm) {
|
||||
return null;
|
||||
}
|
||||
|
||||
SortedSetDocValues segmentDocValues = context.reader().getSortedSetDocValues(fieldName);
|
||||
if (segmentDocValues == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int docBase = context.docBase;
|
||||
return new ConstantScoreScorer(this, this.score(), scoreMode, new TwoPhaseIterator(segmentDocValues) {
|
||||
public boolean matches() throws IOException {
|
||||
topLevelDocValues.advanceExact(docBase + approximation.docID());
|
||||
for(long ord = topLevelDocValues.nextOrd(); ord != -1L; ord = topLevelDocValues.nextOrd()) {
|
||||
if (topLevelTermOrdinals.get(ord)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public float matchCost() {
|
||||
return 10.0F;
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return DocValues.isCacheable(ctx, new String[]{fieldName});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Same binary-search based implementation as SortedSetDocValues.lookupTerm(BytesRef), but with an
|
||||
* optimization to narrow the search space where possible by providing a startOrd instead of begining each search
|
||||
* at 0.
|
||||
*/
|
||||
private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
|
||||
long low = startOrd;
|
||||
long high = docValues.getValueCount()-1;
|
||||
|
||||
while (low <= high) {
|
||||
long mid = (low + high) >>> 1;
|
||||
final BytesRef term = docValues.lookupOrd(mid);
|
||||
int cmp = term.compareTo(key);
|
||||
|
||||
if (cmp < 0) {
|
||||
low = mid + 1;
|
||||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
return mid; // key found
|
||||
}
|
||||
}
|
||||
|
||||
return -(low + 1); // key not found.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
* <ul>
|
||||
* <li>If this is a phase#1 shard request, then {@link #createRangeList} will set this value (non null)
|
||||
* if and only if it is needed for refinement (ie: <code>hardend:false</code> & <code>other</code>
|
||||
* that requres an end value low/high value calculation). And it wil be included in the response</li>
|
||||
* that requires an end value low/high value calculation). And it wil be included in the response</li>
|
||||
* <li>If this is a phase#2 refinement request, this variable will be used
|
||||
* {@link #getOrComputeActualEndForRefinement} to track the value sent with the refinement request
|
||||
* -- or to cache a recomputed value if the request omitted it -- for use in refining the
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestTermsQParserPlugin extends SolrTestCaseJ4 {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml", "schema.xml");
|
||||
|
||||
assertU(adoc("id","1", "author_s", "Lev Grossman", "t_title", "The Magicians", "cat_s", "fantasy", "pubyear_i", "2009"));
|
||||
assertU(adoc("id", "2", "author_s", "Robert Jordan", "t_title", "The Eye of the World", "cat_s", "fantasy", "cat_s", "childrens", "pubyear_i", "1990"));
|
||||
assertU(adoc("id", "3", "author_s", "Robert Jordan", "t_title", "The Great Hunt", "cat_s", "fantasy", "cat_s", "childrens", "pubyear_i", "1990"));
|
||||
assertU(adoc("id", "4", "author_s", "N.K. Jemisin", "t_title", "The Fifth Season", "cat_s", "fantasy", "pubyear_i", "2015"));
|
||||
assertU(commit());
|
||||
assertU(adoc("id", "5", "author_s", "Ursula K. Le Guin", "t_title", "The Dispossessed", "cat_s", "scifi", "pubyear_i", "1974"));
|
||||
assertU(adoc("id", "6", "author_s", "Ursula K. Le Guin", "t_title", "The Left Hand of Darkness", "cat_s", "scifi", "pubyear_i", "1969"));
|
||||
assertU(adoc("id", "7", "author_s", "Isaac Asimov", "t_title", "Foundation", "cat_s", "scifi", "pubyear_i", "1951"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextTermsQuery() {
|
||||
// Single term value
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add("q", "{!terms f=t_title}left");
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=1]",
|
||||
"//result/doc[1]/str[@name='id'][.='6']"
|
||||
);
|
||||
|
||||
// Multiple term values
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", "{!terms f=t_title}left,hunt");
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=2]",
|
||||
"//result/doc[1]/str[@name='id'][.='3']",
|
||||
"//result/doc[2]/str[@name='id'][.='6']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTermsUsingNonDefaultSeparator() {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add("q", "{!terms f=cat_s separator=|}childrens|scifi");
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=5]",
|
||||
"//result/doc[1]/str[@name='id'][.='2']",
|
||||
"//result/doc[2]/str[@name='id'][.='3']",
|
||||
"//result/doc[3]/str[@name='id'][.='5']",
|
||||
"//result/doc[4]/str[@name='id'][.='6']",
|
||||
"//result/doc[5]/str[@name='id'][.='7']"
|
||||
);
|
||||
}
|
||||
|
||||
class TermsParams {
|
||||
public String method;
|
||||
public boolean cache;
|
||||
|
||||
public TermsParams(String method, boolean cache) {
|
||||
this.method = method;
|
||||
this.cache = cache;
|
||||
}
|
||||
|
||||
|
||||
public String buildQuery(String fieldName, String commaDelimitedTerms) {
|
||||
return "{!terms f=" + fieldName + " method=" + method + " cache=" + cache + "}" + commaDelimitedTerms;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTermsMethodEquivalency() {
|
||||
// Run queries with a variety of 'method' and postfilter options.
|
||||
final TermsParams[] methods = new TermsParams[] {
|
||||
new TermsParams("termsFilter", true),
|
||||
new TermsParams("termsFilter", false),
|
||||
new TermsParams("booleanQuery", true),
|
||||
new TermsParams("booleanQuery", false),
|
||||
new TermsParams("automaton", true),
|
||||
new TermsParams("automaton", false),
|
||||
new TermsParams("docValuesTermsFilter", true),
|
||||
new TermsParams("docValuesTermsFilter", false),
|
||||
new TermsParams("docValuesTermsFilterTopLevel", true),
|
||||
new TermsParams("docValuesTermsFilterTopLevel", false),
|
||||
new TermsParams("docValuesTermsFilterPerSegment", true),
|
||||
new TermsParams("docValuesTermsFilterPerSegment", false)
|
||||
};
|
||||
|
||||
for (TermsParams method : methods) {
|
||||
// Single-valued field, single term value
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add("q", method.buildQuery("author_s", "Robert Jordan"));
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=2]",
|
||||
"//result/doc[1]/str[@name='id'][.='2']",
|
||||
"//result/doc[2]/str[@name='id'][.='3']"
|
||||
);
|
||||
|
||||
// Single-valued field, multiple term values
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", method.buildQuery("author_s", "Robert Jordan,Isaac Asimov"));
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=3]",
|
||||
"//result/doc[1]/str[@name='id'][.='2']",
|
||||
"//result/doc[2]/str[@name='id'][.='3']",
|
||||
"//result/doc[3]/str[@name='id'][.='7']"
|
||||
);
|
||||
|
||||
// Multi-valued field, single term value
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", method.buildQuery("cat_s", "childrens"));
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=2]",
|
||||
"//result/doc[1]/str[@name='id'][.='2']",
|
||||
"//result/doc[2]/str[@name='id'][.='3']"
|
||||
);
|
||||
|
||||
// Multi-valued field, multiple term values
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", method.buildQuery("cat_s", "childrens,scifi"));
|
||||
params.add("sort", "id asc");
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=5]",
|
||||
"//result/doc[1]/str[@name='id'][.='2']",
|
||||
"//result/doc[2]/str[@name='id'][.='3']",
|
||||
"//result/doc[3]/str[@name='id'][.='5']",
|
||||
"//result/doc[4]/str[@name='id'][.='6']",
|
||||
"//result/doc[5]/str[@name='id'][.='7']"
|
||||
);
|
||||
|
||||
// Numeric field
|
||||
params = new ModifiableSolrParams();
|
||||
params.add("q", method.buildQuery("pubyear_i", "2009"));
|
||||
params.add("sort", "id asc");
|
||||
|
||||
// Test schema randomizes between Trie and Point. "terms" is supported for "trie" but not "Point"
|
||||
final String numericFieldType = System.getProperty("solr.tests.IntegerFieldType");
|
||||
if (numericFieldType.contains("Point")) {
|
||||
assertQEx("Expected 'terms' query on PointField to fail", req(params, "indent", "on"), 400);
|
||||
} else {
|
||||
assertQ(req(params, "indent", "on"), "*[count(//doc)=1]", "//result/doc[1]/str[@name='id'][.='1']");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -38,7 +38,7 @@
|
|||
</Item>
|
||||
</Array>
|
||||
</Arg>
|
||||
<Set name="host"><Property name="jetty.host" /></Set>
|
||||
<Set name="host"><Property name="solr.jetty.host" default="127.0.0.1"/></Set>
|
||||
<Set name="port"><Property name="jetty.port" default="8983" /></Set>
|
||||
<Set name="idleTimeout"><Property name="solr.jetty.http.idleTimeout" default="120000"/></Set>
|
||||
<Set name="acceptorPriorityDelta"><Property name="solr.jetty.http.acceptorPriorityDelta" default="0"/></Set>
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
</Item>
|
||||
</Array>
|
||||
</Arg>
|
||||
<Set name="host"><Property name="solr.jetty.host" /></Set>
|
||||
<Set name="host"><Property name="solr.jetty.host" default="127.0.0.1"/></Set>
|
||||
<Set name="port"><Property name="solr.jetty.https.port" default="8983" /></Set>
|
||||
<Set name="idleTimeout"><Property name="solr.jetty.https.timeout" default="120000"/></Set>
|
||||
<Set name="acceptorPriorityDelta"><Property name="solr.jetty.ssl.acceptorPriorityDelta" default="0"/></Set>
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
</Item>
|
||||
</Array>
|
||||
</Arg>
|
||||
<Set name="host"><Property name="solr.jetty.host" /></Set>
|
||||
<Set name="host"><Property name="solr.jetty.host" default="127.0.0.1" /></Set>
|
||||
<Set name="port"><Property name="solr.jetty.https.port" default="8983" /></Set>
|
||||
<Set name="idleTimeout"><Property name="solr.jetty.https.timeout" default="120000"/></Set>
|
||||
<Set name="acceptorPriorityDelta"><Property name="solr.jetty.ssl.acceptorPriorityDelta" default="0"/></Set>
|
||||
|
|
|
@ -31,6 +31,11 @@ In this section you will learn how to start a SolrCloud cluster using startup sc
|
|||
This tutorial assumes that you're already familiar with the basics of using Solr. If you need a refresher, please see the <<getting-started.adoc#getting-started,Getting Started section>> to get a grounding in Solr concepts. If you load documents as part of that exercise, you should start over with a fresh Solr installation for these SolrCloud tutorials.
|
||||
====
|
||||
|
||||
[WARNING]
|
||||
====
|
||||
For security reasons, Solr nodes only accept connections from localhost by default. Administrators setting up SolrCloud deployments with multiple nodes must override this setting. For more details see <<securing-solr.adoc#network-configuration,here>>.
|
||||
====
|
||||
|
||||
== SolrCloud Example
|
||||
|
||||
=== Interactive Startup
|
||||
|
|
|
@ -188,7 +188,7 @@ A list of queries that *must not* appear in matching documents.
|
|||
A list of queries *should* appear in matching documents. For a BooleanQuery with no `must` queries, one or more `should` queries must match a document for the BooleanQuery to match.
|
||||
|
||||
`filter`::
|
||||
A list of queries that *must* appear in matching documents. However, unlike `must`, the score of filter queries is ignored. Also, these queries are cached in filter cache. To avoid caching add either `cache=false` as local parameter, or `"cache":"false"` property to underneath Query DLS Object.
|
||||
A list of queries that *must* appear in matching documents. However, unlike `must`, the score of filter queries is ignored. Also, these queries are cached in filter cache. To avoid caching add either `cache=false` as local parameter, or `"cache":"false"` property to underneath Query DLS Object.
|
||||
|
||||
*Examples*
|
||||
|
||||
|
@ -1031,7 +1031,19 @@ The field on which to search. This parameter is required.
|
|||
Separator to use when parsing the input. If set to " " (a single blank space), will trim additional white space from the input terms. Defaults to a comma (`,`).
|
||||
|
||||
`method`::
|
||||
The internal query-building implementation: `termsFilter`, `booleanQuery`, `automaton`, or `docValuesTermsFilter`. Defaults to `termsFilter`.
|
||||
An optional parameter used to determine which of several query implementations should be used by Solr. Options are restricted to: `termsFilter`, `booleanQuery`, `automaton`, `docValuesTermsFilterPerSegment`, `docValuesTermsFilterTopLevel` or `docValuesTermsFilter`. If unspecified, the default value is `termsFilter`. Each implementation has its own performance characteristics, and users are encouraged to experiment to determine which implementation is most performant for their use-case. Heuristics are given below.
|
||||
+
|
||||
`booleanQuery` creates a `BooleanQuery` representing the request. Scales well with index size, but poorly with the number of terms being searched for.
|
||||
+
|
||||
`termsFilter` the default `method`. Uses a `BooleanQuery` or a `TermInSetQuery` depending on the number of terms. Scales well with index size, but only moderately with the number of query terms.
|
||||
+
|
||||
`docValuesTermsFilter` can only be used on fields with docValues data. The `cache` parameter is false by default. Chooses between the `docValuesTermsFilterTopLevel` and `docValuesTermsFilterPerSegment` methods using the number of query terms as a rough heuristic. Users should typically use this method instead of using `docValuesTermsFilterTopLevel` or `docValuesTermsFilterPerSegment` directly, unless they've done performance testing to validate one of the methods on queries of all sizes. Depending on the implementation picked, this method may rely on expensive data structures which are lazily populated after each commit. If you commit frequently and your use-case can tolerate a static warming query, consider adding one to `solrconfig.xml` so that this work is done as a part of the commit itself and not attached directly to user requests.
|
||||
+
|
||||
`docValuesTermsFilterTopLevel` can only be used on fields with docValues data. The `cache` parameter is false by default. Uses top-level docValues data structures to find results. These data structures are more efficient as the number of query terms grows high (over several hundred). But they are also expensive to build and need to be populated lazily after each commit, causing a sometimes-noticeable slowdown on the first query after each commit. If you commit frequently and your use-case can tolerate a static warming query, consider adding one to `solrconfig.xml` so that this work is done as a part of the commit itself and not attached directly to user requests.
|
||||
+
|
||||
`docValuesTermsFilterPerSegment` can only be used on fields with docValues data. The `cache` parameter is false by default. It is more efficient than the "top-level" alternative with small to medium (~500) numbers of query terms, and doesn't suffer a slowdown on queries immediately following a commit (as `docValuesTermsFilterTopLevel` does - see above). But it is less performant on very large numbers of query terms.
|
||||
+
|
||||
`automaton` creates an `AutomatonQuery` representing the request with each term forming a union. Scales well with index size and moderately with the number of query terms.
|
||||
|
||||
*Examples*
|
||||
|
||||
|
|
|
@ -159,20 +159,25 @@ bin/post -u solr:SolrRocks -c gettingstarted a.pdf
|
|||
|
||||
== Post Tool Windows Support
|
||||
|
||||
`bin/post` exists currently only as a Unix shell script, however it delegates its work to a cross-platform capable Java program. The <<SimplePostTool>> can be run directly in supported environments, including Windows.
|
||||
`bin/post` is a Unix shell script and as such cannot be used directly on Windows.
|
||||
However it delegates its work to a cross-platform capable Java program called "SimplePostTool" or `post.jar`, that can be used in Windows environments.
|
||||
|
||||
== SimplePostTool
|
||||
|
||||
The `bin/post` script currently delegates to a standalone Java program called `SimplePostTool`.
|
||||
|
||||
This tool, bundled into a executable JAR, can be run directly using `java -jar example/exampledocs/post.jar`. See the help output and take it from there to post files, recurse a website or file system folder, or send direct commands to a Solr server.
|
||||
The argument syntax differs significantly from `bin/post`, so your first step should be to print the SimplePostTool help text.
|
||||
|
||||
[source,plain]
|
||||
----
|
||||
$ java -jar example/exampledocs/post.jar -h
|
||||
SimplePostTool version 5.0.0
|
||||
Usage: java [SystemProperties] -jar post.jar [-h|-] [<file|folder|url|arg> [<file|folder|url|arg>...]]
|
||||
.
|
||||
.
|
||||
.
|
||||
$ java -jar example\exampledocs\post.jar -h
|
||||
----
|
||||
|
||||
This command prints information about all the arguments and System properties available to SimplePostTool users.
|
||||
There are also examples showing how to post files, crawl a website or file system folder, and send update commands (deletes, etc.) directly to Solr.
|
||||
|
||||
Most usage involves passing both Java System properties and program arguments on the command line. Consider the example below:
|
||||
|
||||
[source,plain]
|
||||
----
|
||||
$ java -jar -Dc=gettingstarted -Dauto example\exampledocs\post.jar example\exampledocs\*
|
||||
----
|
||||
|
||||
This indexes the contents of the `exampledocs` directory into a collection called `gettingstarted`.
|
||||
The `-Dauto` System property governs whether or not Solr sends the document type to Solr during extraction.
|
||||
|
|
|
@ -79,6 +79,22 @@ SOLR_IP_BLACKLIST="192.168.0.3, 192.168.0.4"
|
|||
ZooKeeper is a central and important part of a SolrCloud cluster and understanding how to secure
|
||||
its content is covered in the <<zookeeper-access-control.adoc#zookeeper-access-control,ZooKeeper Access Control>> page.
|
||||
|
||||
|
||||
== Network Configuration
|
||||
|
||||
// tag::security-network-binding-1[]
|
||||
Administrators should consider their security setup carefully as an important step in moving to production. Solr provides a number of features out of the box to meet the security needs of users: authentication and authorization can be configured using a range of security plugins, privacy can be bolstered by enabling SSL/TLS, and (in SolrCloud) ZooKeeper data can be protected with ACL rules to prevent unauthorized reads and writes.
|
||||
|
||||
Even if these measures or others are taken, it is strongly recommended that Solr always be protected by a firewall. Solr is not designed to be exposed on the open internet.
|
||||
|
||||
It is also strongly recommended that Solr listen to only those network interfaces that are strictly required. To prevent administrators from unintentionally exposing Solr more broadly, Solr only listens on the loopback interface ("127.0.0.1") by default. Most deployments will need to change this value to something less restrictive so that it can be reached from other boxes. This can be done by setting a `SOLR_JETTY_HOST` value in your environment's "include script" (`solr.in.sh` or `solr.in.cmd`):
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
SOLR_JETTY_HOST="0.0.0.0"
|
||||
----
|
||||
// end::security-network-binding-1[]
|
||||
|
||||
== Enable Security Manager
|
||||
|
||||
Solr can run in a Java Security Manager sandbox by setting `SOLR_SECURITY_MANAGER_ENABLED=true` via environment variable or in `solr.in.sh`/`solr.in.cmd`. This feature is incompatible with Hadoop.
|
||||
|
|
|
@ -302,6 +302,10 @@ Check these limits every time you upgrade your kernel or operating system. These
|
|||
If these limits are exceeded, the problems reported by Solr vary depending on the specific operation responsible for exceeding the limit. Errors such as "too many open files", "connection error", and "max processes exceeded" have been reported, as well as SolrCloud recovery failures.
|
||||
====
|
||||
|
||||
== Security Considerations
|
||||
|
||||
include::securing-solr.adoc[tag=security-network-binding-1]
|
||||
|
||||
== Running Multiple Solr Nodes per Host
|
||||
|
||||
The `bin/solr` script is capable of running multiple instances on one machine, but for a *typical* installation, this is not a recommended setup. Extra CPU and memory resources are required for each additional instance. A single instance is easily capable of handling multiple indexes.
|
||||
|
|
Loading…
Reference in New Issue