Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

2016-05-19 05:09:09 -04:00 · 2016-05-19 05:09:09 -04:00 · 390f3a1025
parent 8290302667 7793c06a30
commit 390f3a1025
11 changed files with 164 additions and 36 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -118,6 +118,12 @@ Bug Fixes
 * LUCENE-7279: JapaneseTokenizer throws ArrayIndexOutOfBoundsException
  on some valid inputs (Mike McCandless)

+* LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan
+  Woodward)
+
+* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term
+  phrase queries. (Eva Popenda, Alan Woodward)
+
 Documentation

 * LUCENE-7223: Improve XXXPoint javadocs to make it clear that you
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
@ -386,7 +386,7 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {

    @Override
    public float positionsCost() {
-      throw new UnsupportedOperationException();
+      return 0;
    }
  }

--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
@ -29,6 +29,7 @@ import org.apache.lucene.search.CheckHits;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@ -72,7 +73,8 @@ public class TestNearSpansOrdered extends LuceneTestCase {
    "w1 xx w2 yy w3",
    "w1 w3 xx w2 yy w3 zz",
    "t1 t2 t2 t1",
-    "g x x g g x x x g g x x g"
+    "g x x g g x x x g g x x g",
+      "go to webpage"
  };

  protected SpanNearQuery makeQuery(String s1, String s2, String s3,
@ -292,6 +294,23 @@ public class TestNearSpansOrdered extends LuceneTestCase {
    assertFinished(spans);
  }

+  public void testNestedGaps() throws Exception {
+    SpanQuery q = SpanNearQuery.newOrderedNearQuery(FIELD)
+        .addClause(new SpanOrQuery(
+            new SpanTermQuery(new Term(FIELD, "open")),
+            SpanNearQuery.newOrderedNearQuery(FIELD)
+                .addClause(new SpanTermQuery(new Term(FIELD, "go")))
+                .addGap(1)
+                .build()
+        ))
+        .addClause(new SpanTermQuery(new Term(FIELD, "webpage")))
+        .build();
+
+    TopDocs topDocs = searcher.search(q, 1);
+    assertEquals(6, topDocs.scoreDocs[0].doc);
+
+  }
+
  /*
    protected String[] docFields = {
    "w1 w2 w3 w4 w5",
@ -299,7 +318,8 @@ public class TestNearSpansOrdered extends LuceneTestCase {
    "w1 xx w2 yy w3",
    "w1 w3 xx w2 yy w3 zz",
    "t1 t2 t2 t1",
-    "g x x g g x x x g g x x g"
+    "g x x g g x x x g g x x g",
+    "go to webpage"
  };
   */
 }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@ -115,24 +115,29 @@ public class WeightedSpanTermExtractor {
    } else if (query instanceof PhraseQuery) {
      PhraseQuery phraseQuery = ((PhraseQuery) query);
      Term[] phraseQueryTerms = phraseQuery.getTerms();
-      SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
-      for (int i = 0; i < phraseQueryTerms.length; i++) {
-        clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+      if (phraseQueryTerms.length == 1) {
+        extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
      }
+      else {
+        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
+        for (int i = 0; i < phraseQueryTerms.length; i++) {
+          clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+        }

-      // sum position increments beyond 1
-      int positionGaps = 0;
-      int[] positions = phraseQuery.getPositions();
-      if (positions.length >= 2) {
-        // positions are in increasing order.   max(0,...) is just a safeguard.
-        positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1);
+        // sum position increments beyond 1
+        int positionGaps = 0;
+        int[] positions = phraseQuery.getPositions();
+        if (positions.length >= 2) {
+          // positions are in increasing order.   max(0,...) is just a safeguard.
+          positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
+        }
+
+        //if original slop is 0 then require inOrder
+        boolean inorder = (phraseQuery.getSlop() == 0);
+
+        SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
+        extractWeightedSpanTerms(terms, sp, boost);
      }
-
-      //if original slop is 0 then require inOrder
-      boolean inorder = (phraseQuery.getSlop() == 0);
-
-      SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
-      extractWeightedSpanTerms(terms, sp, boost);
    } else if (query instanceof TermQuery) {
      extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.search.highlight;

+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@ -28,9 +30,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;

-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CachingTokenFilter;
@ -41,13 +40,14 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
@ -93,6 +93,7 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Test;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;

@ -1560,6 +1561,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
    helper.start();
  }

+  @Test
+  public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+
+    final Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        return new TokenStreamComponents(new NGramTokenizer(4, 4));
+      }
+    };
+    final String fieldName = "substring";
+
+    final List<BytesRef> list = new ArrayList<>();
+    list.add(new BytesRef("uchu"));
+    final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
+
+    final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
+    final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
+
+    final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
+    highlighter.setTextFragmenter(new SimpleFragmenter(100));
+    final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
+
+    assertEquals("B<b>uchu</b>ng",fragment);
+
+  }
+
  public void testUnRewrittenQuery() throws Exception {
    final TestHighlightRunner helper = new TestHighlightRunner() {

--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -224,6 +224,8 @@ Bug Fixes
 * SOLR-9116: Race condition causing occasional SolrIndexSearcher leak when SolrCore is reloaded.
  (Jessica Cheng Mallet via shalin)

+* SOLR-9128: Fix error handling issues in Streaming classes (Joel Bernstein)
+
 Optimizations
 ----------------------
 * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
--- a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
@ -113,6 +113,7 @@ public class GraphHandler extends RequestHandlerBase implements SolrCoreAware, P
        .withFunctionName("update", UpdateStream.class)
        .withFunctionName("jdbc", JDBCStream.class)
        .withFunctionName("intersect", IntersectStream.class)
+        .withFunctionName("select", SelectStream.class)
        .withFunctionName("complement", ComplementStream.class)
        .withFunctionName("daemon", DaemonStream.class)
        .withFunctionName("topic", TopicStream.class)
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/GatherNodesStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/GatherNodesStream.java
@ -275,7 +275,7 @@ public class GatherNodesStream extends TupleStream implements Expressible {
    // collection
    expression.addParameter(collection);

-    if(includeStreams){
+    if(includeStreams && !(tupleStream instanceof NodeStream)){
      if(tupleStream instanceof Expressible){
        expression.addParameter(((Expressible)tupleStream).toExpression(factory));
      }
@ -311,7 +311,14 @@ public class GatherNodesStream extends TupleStream implements Expressible {
    if(maxDocFreq > -1) {
      expression.addParameter(new StreamExpressionNamedParameter("maxDocFreq", Integer.toString(maxDocFreq)));
    }
-    expression.addParameter(new StreamExpressionNamedParameter("walk", traverseFrom+"->"+traverseTo));
+    if(tupleStream instanceof NodeStream) {
+      NodeStream nodeStream = (NodeStream)tupleStream;
+      expression.addParameter(new StreamExpressionNamedParameter("walk", nodeStream.toString() + "->" + traverseTo));
+
+    } else {
+      expression.addParameter(new StreamExpressionNamedParameter("walk", traverseFrom + "->" + traverseTo));
+    }
+
    expression.addParameter(new StreamExpressionNamedParameter("trackTraversal", Boolean.toString(trackTraversal)));

    StringBuilder buf = new StringBuilder();
@ -641,6 +648,19 @@ public class GatherNodesStream extends TupleStream implements Expressible {
        return new Tuple(map);
      }
    }
+
+    public String toString() {
+      StringBuilder builder = new StringBuilder();
+      boolean comma = false;
+      for(String s : ids) {
+        if(comma) {
+          builder.append(",");
+        }
+        builder.append(s);
+        comma = true;
+      }
+      return builder.toString();
+    }
    
    @Override
    public Explanation toExplanation(StreamFactory factory) throws IOException {
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java
@ -125,17 +125,17 @@ public class CloudSolrStream extends TupleStream implements Expressible {
    List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression);
    StreamExpressionNamedParameter aliasExpression = factory.getNamedOperand(expression, "aliases");
    StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost");
-    
+
+    // Collection Name
+    if(null == collectionName){
+      throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression));
+    }
+
    // Validate there are no unknown parameters - zkHost and alias are namedParameter so we don't need to count it twice
    if(expression.getParameters().size() != 1 + namedParams.size()){
      throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression));
    }
    
-    // Collection Name
-    if(null == collectionName){
-      throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression));
-    }
-        
    // Named parameters - passed directly to solr as solrparams
    if(0 == namedParams.size()){
      throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression));
@ -257,15 +257,20 @@ public class CloudSolrStream extends TupleStream implements Expressible {
    // If the comparator is null then it was not explicitly set so we will create one using the sort parameter
    // of the query. While doing this we will also take into account any aliases such that if we are sorting on
    // fieldA but fieldA is aliased to alias.fieldA then the comparater will be against alias.fieldA.
-    String fls = String.join(",", params.getParams("fl"));
-    if (fls == null) {
-      throw new IOException("fl param expected for a stream");
+
+    if (params.get("q") == null) {
+      throw new IOException("q param expected for search function");
    }

-    String sorts = String.join(",", params.getParams("sort"));
-    if (sorts == null) {
-      throw new IOException("sort param expected for a stream");
+    if (params.getParams("fl") == null) {
+      throw new IOException("fl param expected for search function");
    }
+    String fls = String.join(",", params.getParams("fl"));
+
+    if (params.getParams("sort") == null) {
+      throw new IOException("sort param expected for search function");
+    }
+    String sorts = String.join(",", params.getParams("sort"));
    this.comp = parseComp(sorts, fls);
  }
  
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java
@ -477,6 +477,9 @@ public class FacetStream extends TupleStream implements Expressible  {

    String bucketName = _buckets[level].toString();
    NamedList nl = (NamedList)facets.get(bucketName);
+    if(nl == null) {
+      return;
+    }
    List allBuckets = (List)nl.get("buckets");
    for(int b=0; b<allBuckets.size(); b++) {
      NamedList bucket = (NamedList)allBuckets.get(b);
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@ -129,6 +129,24 @@ public class StreamExpressionTest extends SolrCloudTestCase {
    assertOrder(tuples, 0, 3, 4);
    assertLong(tuples.get(1), "a_i", 3);

+    try {
+      expression = StreamExpressionParser.parse("search(" + COLLECTION + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
+      stream = new CloudSolrStream(expression, factory);
+      tuples = getTuples(stream);
+      throw new Exception("Should be an exception here");
+    } catch(Exception e) {
+      assertTrue(e.getMessage().contains("q param expected for search function"));
+    }
+
+    try {
+      expression = StreamExpressionParser.parse("search(" + COLLECTION + ", q=\"blah\", sort=\"a_f asc, a_i asc\")");
+      stream = new CloudSolrStream(expression, factory);
+      tuples = getTuples(stream);
+      throw new Exception("Should be an exception here");
+    } catch(Exception e) {
+      assertTrue(e.getMessage().contains("fl param expected for search function"));
+    }
+
  }

  @Test
@ -2131,6 +2149,27 @@ public class StreamExpressionTest extends SolrCloudTestCase {
    assertTrue(avgf.doubleValue() == 5.5D);
    assertTrue(count.doubleValue() == 2);

+    //Test zero result facets
+    clause = "facet("
+        +   "collection1, "
+        +   "q=\"blahhh\", "
+        +   "fl=\"a_s,a_i,a_f\", "
+        +   "sort=\"a_s asc\", "
+        +   "buckets=\"a_s\", "
+        +   "bucketSorts=\"a_s asc\", "
+        +   "bucketSizeLimit=100, "
+        +   "sum(a_i), sum(a_f), "
+        +   "min(a_i), min(a_f), "
+        +   "max(a_i), max(a_f), "
+        +   "avg(a_i), avg(a_f), "
+        +   "count(*)"
+        + ")";
+
+    stream = factory.constructStream(clause);
+    tuples = getTuples(stream);
+
+    assert(tuples.size() == 0);
+
  }

  @Test