mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
390f3a1025
|
@ -118,6 +118,12 @@ Bug Fixes
|
|||
* LUCENE-7279: JapaneseTokenizer throws ArrayIndexOutOfBoundsException
|
||||
on some valid inputs (Mike McCandless)
|
||||
|
||||
* LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan
|
||||
Woodward)
|
||||
|
||||
* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term
|
||||
phrase queries. (Eva Popenda, Alan Woodward)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-7223: Improve XXXPoint javadocs to make it clear that you
|
||||
|
|
|
@ -386,7 +386,7 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
throw new UnsupportedOperationException();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.search.CheckHits;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -72,7 +73,8 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
"w1 xx w2 yy w3",
|
||||
"w1 w3 xx w2 yy w3 zz",
|
||||
"t1 t2 t2 t1",
|
||||
"g x x g g x x x g g x x g"
|
||||
"g x x g g x x x g g x x g",
|
||||
"go to webpage"
|
||||
};
|
||||
|
||||
protected SpanNearQuery makeQuery(String s1, String s2, String s3,
|
||||
|
@ -292,6 +294,23 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
assertFinished(spans);
|
||||
}
|
||||
|
||||
public void testNestedGaps() throws Exception {
|
||||
SpanQuery q = SpanNearQuery.newOrderedNearQuery(FIELD)
|
||||
.addClause(new SpanOrQuery(
|
||||
new SpanTermQuery(new Term(FIELD, "open")),
|
||||
SpanNearQuery.newOrderedNearQuery(FIELD)
|
||||
.addClause(new SpanTermQuery(new Term(FIELD, "go")))
|
||||
.addGap(1)
|
||||
.build()
|
||||
))
|
||||
.addClause(new SpanTermQuery(new Term(FIELD, "webpage")))
|
||||
.build();
|
||||
|
||||
TopDocs topDocs = searcher.search(q, 1);
|
||||
assertEquals(6, topDocs.scoreDocs[0].doc);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
protected String[] docFields = {
|
||||
"w1 w2 w3 w4 w5",
|
||||
|
@ -299,7 +318,8 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
"w1 xx w2 yy w3",
|
||||
"w1 w3 xx w2 yy w3 zz",
|
||||
"t1 t2 t2 t1",
|
||||
"g x x g g x x x g g x x g"
|
||||
"g x x g g x x x g g x x g",
|
||||
"go to webpage"
|
||||
};
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -115,24 +115,29 @@ public class WeightedSpanTermExtractor {
|
|||
} else if (query instanceof PhraseQuery) {
|
||||
PhraseQuery phraseQuery = ((PhraseQuery) query);
|
||||
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||
if (phraseQueryTerms.length == 1) {
|
||||
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
||||
}
|
||||
else {
|
||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||
}
|
||||
|
||||
// sum position increments beyond 1
|
||||
int positionGaps = 0;
|
||||
int[] positions = phraseQuery.getPositions();
|
||||
if (positions.length >= 2) {
|
||||
// positions are in increasing order. max(0,...) is just a safeguard.
|
||||
positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1);
|
||||
// sum position increments beyond 1
|
||||
int positionGaps = 0;
|
||||
int[] positions = phraseQuery.getPositions();
|
||||
if (positions.length >= 2) {
|
||||
// positions are in increasing order. max(0,...) is just a safeguard.
|
||||
positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
|
||||
}
|
||||
|
||||
//if original slop is 0 then require inOrder
|
||||
boolean inorder = (phraseQuery.getSlop() == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
|
||||
extractWeightedSpanTerms(terms, sp, boost);
|
||||
}
|
||||
|
||||
//if original slop is 0 then require inOrder
|
||||
boolean inorder = (phraseQuery.getSlop() == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
|
||||
extractWeightedSpanTerms(terms, sp, boost);
|
||||
} else if (query instanceof TermQuery) {
|
||||
extractWeightedTerms(terms, query, boost);
|
||||
} else if (query instanceof SpanQuery) {
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.lucene.search.highlight;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
@ -28,9 +30,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
|
@ -41,13 +40,14 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
|
@ -93,6 +93,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
|
@ -1560,6 +1561,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
helper.start();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
||||
}
|
||||
};
|
||||
final String fieldName = "substring";
|
||||
|
||||
final List<BytesRef> list = new ArrayList<>();
|
||||
list.add(new BytesRef("uchu"));
|
||||
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
|
||||
|
||||
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
||||
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
|
||||
|
||||
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
||||
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
|
||||
|
||||
assertEquals("B<b>uchu</b>ng",fragment);
|
||||
|
||||
}
|
||||
|
||||
public void testUnRewrittenQuery() throws Exception {
|
||||
final TestHighlightRunner helper = new TestHighlightRunner() {
|
||||
|
||||
|
|
|
@ -224,6 +224,8 @@ Bug Fixes
|
|||
* SOLR-9116: Race condition causing occasional SolrIndexSearcher leak when SolrCore is reloaded.
|
||||
(Jessica Cheng Mallet via shalin)
|
||||
|
||||
* SOLR-9128: Fix error handling issues in Streaming classes (Joel Bernstein)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
|
||||
|
|
|
@ -113,6 +113,7 @@ public class GraphHandler extends RequestHandlerBase implements SolrCoreAware, P
|
|||
.withFunctionName("update", UpdateStream.class)
|
||||
.withFunctionName("jdbc", JDBCStream.class)
|
||||
.withFunctionName("intersect", IntersectStream.class)
|
||||
.withFunctionName("select", SelectStream.class)
|
||||
.withFunctionName("complement", ComplementStream.class)
|
||||
.withFunctionName("daemon", DaemonStream.class)
|
||||
.withFunctionName("topic", TopicStream.class)
|
||||
|
|
|
@ -275,7 +275,7 @@ public class GatherNodesStream extends TupleStream implements Expressible {
|
|||
// collection
|
||||
expression.addParameter(collection);
|
||||
|
||||
if(includeStreams){
|
||||
if(includeStreams && !(tupleStream instanceof NodeStream)){
|
||||
if(tupleStream instanceof Expressible){
|
||||
expression.addParameter(((Expressible)tupleStream).toExpression(factory));
|
||||
}
|
||||
|
@ -311,7 +311,14 @@ public class GatherNodesStream extends TupleStream implements Expressible {
|
|||
if(maxDocFreq > -1) {
|
||||
expression.addParameter(new StreamExpressionNamedParameter("maxDocFreq", Integer.toString(maxDocFreq)));
|
||||
}
|
||||
expression.addParameter(new StreamExpressionNamedParameter("walk", traverseFrom+"->"+traverseTo));
|
||||
if(tupleStream instanceof NodeStream) {
|
||||
NodeStream nodeStream = (NodeStream)tupleStream;
|
||||
expression.addParameter(new StreamExpressionNamedParameter("walk", nodeStream.toString() + "->" + traverseTo));
|
||||
|
||||
} else {
|
||||
expression.addParameter(new StreamExpressionNamedParameter("walk", traverseFrom + "->" + traverseTo));
|
||||
}
|
||||
|
||||
expression.addParameter(new StreamExpressionNamedParameter("trackTraversal", Boolean.toString(trackTraversal)));
|
||||
|
||||
StringBuilder buf = new StringBuilder();
|
||||
|
@ -641,6 +648,19 @@ public class GatherNodesStream extends TupleStream implements Expressible {
|
|||
return new Tuple(map);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
boolean comma = false;
|
||||
for(String s : ids) {
|
||||
if(comma) {
|
||||
builder.append(",");
|
||||
}
|
||||
builder.append(s);
|
||||
comma = true;
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation toExplanation(StreamFactory factory) throws IOException {
|
||||
|
|
|
@ -125,17 +125,17 @@ public class CloudSolrStream extends TupleStream implements Expressible {
|
|||
List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression);
|
||||
StreamExpressionNamedParameter aliasExpression = factory.getNamedOperand(expression, "aliases");
|
||||
StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost");
|
||||
|
||||
|
||||
// Collection Name
|
||||
if(null == collectionName){
|
||||
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression));
|
||||
}
|
||||
|
||||
// Validate there are no unknown parameters - zkHost and alias are namedParameter so we don't need to count it twice
|
||||
if(expression.getParameters().size() != 1 + namedParams.size()){
|
||||
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression));
|
||||
}
|
||||
|
||||
// Collection Name
|
||||
if(null == collectionName){
|
||||
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression));
|
||||
}
|
||||
|
||||
// Named parameters - passed directly to solr as solrparams
|
||||
if(0 == namedParams.size()){
|
||||
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression));
|
||||
|
@ -257,15 +257,20 @@ public class CloudSolrStream extends TupleStream implements Expressible {
|
|||
// If the comparator is null then it was not explicitly set so we will create one using the sort parameter
|
||||
// of the query. While doing this we will also take into account any aliases such that if we are sorting on
|
||||
// fieldA but fieldA is aliased to alias.fieldA then the comparater will be against alias.fieldA.
|
||||
String fls = String.join(",", params.getParams("fl"));
|
||||
if (fls == null) {
|
||||
throw new IOException("fl param expected for a stream");
|
||||
|
||||
if (params.get("q") == null) {
|
||||
throw new IOException("q param expected for search function");
|
||||
}
|
||||
|
||||
String sorts = String.join(",", params.getParams("sort"));
|
||||
if (sorts == null) {
|
||||
throw new IOException("sort param expected for a stream");
|
||||
if (params.getParams("fl") == null) {
|
||||
throw new IOException("fl param expected for search function");
|
||||
}
|
||||
String fls = String.join(",", params.getParams("fl"));
|
||||
|
||||
if (params.getParams("sort") == null) {
|
||||
throw new IOException("sort param expected for search function");
|
||||
}
|
||||
String sorts = String.join(",", params.getParams("sort"));
|
||||
this.comp = parseComp(sorts, fls);
|
||||
}
|
||||
|
||||
|
|
|
@ -477,6 +477,9 @@ public class FacetStream extends TupleStream implements Expressible {
|
|||
|
||||
String bucketName = _buckets[level].toString();
|
||||
NamedList nl = (NamedList)facets.get(bucketName);
|
||||
if(nl == null) {
|
||||
return;
|
||||
}
|
||||
List allBuckets = (List)nl.get("buckets");
|
||||
for(int b=0; b<allBuckets.size(); b++) {
|
||||
NamedList bucket = (NamedList)allBuckets.get(b);
|
||||
|
|
|
@ -129,6 +129,24 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
|||
assertOrder(tuples, 0, 3, 4);
|
||||
assertLong(tuples.get(1), "a_i", 3);
|
||||
|
||||
try {
|
||||
expression = StreamExpressionParser.parse("search(" + COLLECTION + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
|
||||
stream = new CloudSolrStream(expression, factory);
|
||||
tuples = getTuples(stream);
|
||||
throw new Exception("Should be an exception here");
|
||||
} catch(Exception e) {
|
||||
assertTrue(e.getMessage().contains("q param expected for search function"));
|
||||
}
|
||||
|
||||
try {
|
||||
expression = StreamExpressionParser.parse("search(" + COLLECTION + ", q=\"blah\", sort=\"a_f asc, a_i asc\")");
|
||||
stream = new CloudSolrStream(expression, factory);
|
||||
tuples = getTuples(stream);
|
||||
throw new Exception("Should be an exception here");
|
||||
} catch(Exception e) {
|
||||
assertTrue(e.getMessage().contains("fl param expected for search function"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -2131,6 +2149,27 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
|||
assertTrue(avgf.doubleValue() == 5.5D);
|
||||
assertTrue(count.doubleValue() == 2);
|
||||
|
||||
//Test zero result facets
|
||||
clause = "facet("
|
||||
+ "collection1, "
|
||||
+ "q=\"blahhh\", "
|
||||
+ "fl=\"a_s,a_i,a_f\", "
|
||||
+ "sort=\"a_s asc\", "
|
||||
+ "buckets=\"a_s\", "
|
||||
+ "bucketSorts=\"a_s asc\", "
|
||||
+ "bucketSizeLimit=100, "
|
||||
+ "sum(a_i), sum(a_f), "
|
||||
+ "min(a_i), min(a_f), "
|
||||
+ "max(a_i), max(a_f), "
|
||||
+ "avg(a_i), avg(a_f), "
|
||||
+ "count(*)"
|
||||
+ ")";
|
||||
|
||||
stream = factory.constructStream(clause);
|
||||
tuples = getTuples(stream);
|
||||
|
||||
assert(tuples.size() == 0);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue