diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 73a4c21f320..5eabdb42f10 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -118,6 +118,12 @@ Bug Fixes * LUCENE-7279: JapaneseTokenizer throws ArrayIndexOutOfBoundsException on some valid inputs (Mike McCandless) +* LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan + Woodward) + +* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term + phrase queries. (Eva Popenda, Alan Woodward) + Documentation * LUCENE-7223: Improve XXXPoint javadocs to make it clear that you diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 167a7d1fb0e..d542227b37f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -386,7 +386,7 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { @Override public float positionsCost() { - throw new UnsupportedOperationException(); + return 0; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index f297f33676d..d9c003a9002 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -72,7 +73,8 @@ public class TestNearSpansOrdered extends LuceneTestCase { "w1 xx w2 yy w3", "w1 w3 xx w2 yy w3 zz", "t1 t2 t2 t1", - "g x x g g x x x g g x x g" + "g x x g g x x x g g x x g", + "go to webpage" }; protected SpanNearQuery makeQuery(String s1, String s2, String s3, @@ -292,6 +294,23 @@ public class TestNearSpansOrdered extends LuceneTestCase { assertFinished(spans); } + public void testNestedGaps() throws Exception { + SpanQuery q = SpanNearQuery.newOrderedNearQuery(FIELD) + .addClause(new SpanOrQuery( + new SpanTermQuery(new Term(FIELD, "open")), + SpanNearQuery.newOrderedNearQuery(FIELD) + .addClause(new SpanTermQuery(new Term(FIELD, "go"))) + .addGap(1) + .build() + )) + .addClause(new SpanTermQuery(new Term(FIELD, "webpage"))) + .build(); + + TopDocs topDocs = searcher.search(q, 1); + assertEquals(6, topDocs.scoreDocs[0].doc); + + } + /* protected String[] docFields = { "w1 w2 w3 w4 w5", @@ -299,7 +318,8 @@ public class TestNearSpansOrdered extends LuceneTestCase { "w1 xx w2 yy w3", "w1 w3 xx w2 yy w3 zz", "t1 t2 t2 t1", - "g x x g g x x x g g x x g" + "g x x g g x x x g g x x g", + "go to webpage" }; */ } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 16b1d7bba85..89cbd11cba9 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -115,24 +115,29 @@ public class WeightedSpanTermExtractor { } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); - SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; - for (int i = 0; i < phraseQueryTerms.length; i++) { - clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); + if (phraseQueryTerms.length == 1) { + extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost); } + else { + SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; + for (int i = 0; i < phraseQueryTerms.length; i++) { + clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); + } - // sum position increments beyond 1 - int positionGaps = 0; - int[] positions = phraseQuery.getPositions(); - if (positions.length >= 2) { - // positions are in increasing order. max(0,...) is just a safeguard. - positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1); + // sum position increments beyond 1 + int positionGaps = 0; + int[] positions = phraseQuery.getPositions(); + if (positions.length >= 2) { + // positions are in increasing order. max(0,...) is just a safeguard. + positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1); + } + + //if original slop is 0 then require inOrder + boolean inorder = (phraseQuery.getSlop() == 0); + + SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder); + extractWeightedSpanTerms(terms, sp, boost); } - - //if original slop is 0 then require inOrder - boolean inorder = (phraseQuery.getSlop() == 0); - - SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder); - extractWeightedSpanTerms(terms, sp, boost); } else if (query instanceof TermQuery) { extractWeightedTerms(terms, query, boost); } else if (query instanceof SpanQuery) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 936d121fc69..0a034f1ac36 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.search.highlight; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -28,9 +30,6 @@ import java.util.List; import java.util.Map; import java.util.StringTokenizer; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CachingTokenFilter; @@ -41,13 +40,14 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; @@ -93,6 +93,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; +import org.junit.Test; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -1560,6 +1561,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte helper.start(); } + @Test + public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException { + + final Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new NGramTokenizer(4, 4)); + } + }; + final String fieldName = "substring"; + + final List list = new ArrayList<>(); + list.add(new BytesRef("uchu")); + final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()])); + + final QueryScorer fragmentScorer = new QueryScorer(query, fieldName); + final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("", ""); + + final Highlighter highlighter = new Highlighter(formatter, fragmentScorer); + highlighter.setTextFragmenter(new SimpleFragmenter(100)); + final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung"); + + assertEquals("Buchung",fragment); + + } + public void testUnRewrittenQuery() throws Exception { final TestHighlightRunner helper = new TestHighlightRunner() { diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c065374bfd7..92211e3e821 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -224,6 +224,8 @@ Bug Fixes * SOLR-9116: Race condition causing occasional SolrIndexSearcher leak when SolrCore is reloaded. (Jessica Cheng Mallet via shalin) +* SOLR-9128: Fix error handling issues in Streaming classes (Joel Bernstein) + Optimizations ---------------------- * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation. diff --git a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java index a6e2ce149bf..a203a4fac30 100644 --- a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java @@ -113,6 +113,7 @@ public class GraphHandler extends RequestHandlerBase implements SolrCoreAware, P .withFunctionName("update", UpdateStream.class) .withFunctionName("jdbc", JDBCStream.class) .withFunctionName("intersect", IntersectStream.class) + .withFunctionName("select", SelectStream.class) .withFunctionName("complement", ComplementStream.class) .withFunctionName("daemon", DaemonStream.class) .withFunctionName("topic", TopicStream.class) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/GatherNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/GatherNodesStream.java index 10bd6340378..52a6a1ed392 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/GatherNodesStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/GatherNodesStream.java @@ -275,7 +275,7 @@ public class GatherNodesStream extends TupleStream implements Expressible { // collection expression.addParameter(collection); - if(includeStreams){ + if(includeStreams && !(tupleStream instanceof NodeStream)){ if(tupleStream instanceof Expressible){ expression.addParameter(((Expressible)tupleStream).toExpression(factory)); } @@ -311,7 +311,14 @@ public class GatherNodesStream extends TupleStream implements Expressible { if(maxDocFreq > -1) { expression.addParameter(new StreamExpressionNamedParameter("maxDocFreq", Integer.toString(maxDocFreq))); } - expression.addParameter(new StreamExpressionNamedParameter("walk", traverseFrom+"->"+traverseTo)); + if(tupleStream instanceof NodeStream) { + NodeStream nodeStream = (NodeStream)tupleStream; + expression.addParameter(new StreamExpressionNamedParameter("walk", nodeStream.toString() + "->" + traverseTo)); + + } else { + expression.addParameter(new StreamExpressionNamedParameter("walk", traverseFrom + "->" + traverseTo)); + } + expression.addParameter(new StreamExpressionNamedParameter("trackTraversal", Boolean.toString(trackTraversal))); StringBuilder buf = new StringBuilder(); @@ -641,6 +648,19 @@ public class GatherNodesStream extends TupleStream implements Expressible { return new Tuple(map); } } + + public String toString() { + StringBuilder builder = new StringBuilder(); + boolean comma = false; + for(String s : ids) { + if(comma) { + builder.append(","); + } + builder.append(s); + comma = true; + } + return builder.toString(); + } @Override public Explanation toExplanation(StreamFactory factory) throws IOException { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java index dd02175e963..8aba89c2e66 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java @@ -125,17 +125,17 @@ public class CloudSolrStream extends TupleStream implements Expressible { List namedParams = factory.getNamedOperands(expression); StreamExpressionNamedParameter aliasExpression = factory.getNamedOperand(expression, "aliases"); StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost"); - + + // Collection Name + if(null == collectionName){ + throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression)); + } + // Validate there are no unknown parameters - zkHost and alias are namedParameter so we don't need to count it twice if(expression.getParameters().size() != 1 + namedParams.size()){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression)); } - // Collection Name - if(null == collectionName){ - throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression)); - } - // Named parameters - passed directly to solr as solrparams if(0 == namedParams.size()){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression)); @@ -257,15 +257,20 @@ public class CloudSolrStream extends TupleStream implements Expressible { // If the comparator is null then it was not explicitly set so we will create one using the sort parameter // of the query. While doing this we will also take into account any aliases such that if we are sorting on // fieldA but fieldA is aliased to alias.fieldA then the comparater will be against alias.fieldA. - String fls = String.join(",", params.getParams("fl")); - if (fls == null) { - throw new IOException("fl param expected for a stream"); + + if (params.get("q") == null) { + throw new IOException("q param expected for search function"); } - String sorts = String.join(",", params.getParams("sort")); - if (sorts == null) { - throw new IOException("sort param expected for a stream"); + if (params.getParams("fl") == null) { + throw new IOException("fl param expected for search function"); } + String fls = String.join(",", params.getParams("fl")); + + if (params.getParams("sort") == null) { + throw new IOException("sort param expected for search function"); + } + String sorts = String.join(",", params.getParams("sort")); this.comp = parseComp(sorts, fls); } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java index ae04a85970c..86124dedf3c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java @@ -477,6 +477,9 @@ public class FacetStream extends TupleStream implements Expressible { String bucketName = _buckets[level].toString(); NamedList nl = (NamedList)facets.get(bucketName); + if(nl == null) { + return; + } List allBuckets = (List)nl.get("buckets"); for(int b=0; b