SOLR-2606: Fixed sort parsing of fields containing punctuation

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1152653 13f79535-47bb-0310-9956-ffa450edef68
2011-07-31 22:08:43 +00:00 · 2011-07-31 22:08:43 +00:00 · 2b7362de2b
parent 865098ae7e
commit 2b7362de2b
5 changed files with 133 additions and 16 deletions
--- a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
@ -281,14 +281,19 @@ public class _TestUtil {
    0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
  };
  
-  /** Returns random string, all codepoints within the same unicode block. */
+  /** Returns random string of length between 0-20 codepoints, all codepoints within the same unicode block. */
  public static String randomRealisticUnicodeString(Random r) {
    return randomRealisticUnicodeString(r, 20);
  }
  
-  /** Returns random string, all codepoints within the same unicode block. */
+  /** Returns random string of length up to maxLength codepoints , all codepoints within the same unicode block. */
  public static String randomRealisticUnicodeString(Random r, int maxLength) {
-    final int end = r.nextInt(maxLength);
+    return randomRealisticUnicodeString(r, 0, 20);
+  }
+
+  /** Returns random string of length between min and max codepoints, all codepoints within the same unicode block. */
+  public static String randomRealisticUnicodeString(Random r, int minLength, int maxLength) {
+    final int end = minLength + r.nextInt(maxLength);
    final int block = r.nextInt(blockStarts.length);
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < end; i++)
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -406,6 +406,10 @@ Bug Fixes
  in servlet containers such as WebSphere that do not use a default list
  (Jay R. Jaeger, hossman)

+* SOLR-2606: Fixed sort parsing of fields containing punctuation that 
+  failed due to sort by function changes introduced in SOLR-1297
+  (Mitsu Hadeishi, hossman)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/search/QueryParsing.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryParsing.java
@ -247,10 +247,11 @@ public class QueryParsing {

        // short circuit test for a really simple field name
        String field = sp.getId(null);
-        ParseException qParserException = null;
+        Exception qParserException = null;

-        if (field == null || sp.ch() != ' ') {
+        if (field == null || !Character.isWhitespace(sp.peekChar())) {
          // let's try it as a function instead
+          field = null;
          String funcStr = sp.val.substring(start);

          QParser parser = QParser.getParser(funcStr, FunctionQParserPlugin.NAME, req);
@ -297,7 +298,9 @@ public class QueryParsing {
              }
              continue;
            }
-          } catch (ParseException e) {
+          } catch (IOException ioe) {
+            throw ioe;
+          } catch (Exception e) {
            // hang onto this in case the string isn't a full field name either
            qParserException = e;
          }
--- a/solr/core/src/test/org/apache/solr/search/TestSort.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSort.java
@ -24,21 +24,28 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.OpenBitSet;
-import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.lucene.util._TestUtil;
+
+import org.apache.solr.request.SolrQueryRequest;
+
+import org.apache.solr.SolrTestCaseJ4;
+
+import org.junit.BeforeClass;

 import java.io.IOException;
 import java.util.*;

-public class TestSort extends AbstractSolrTestCase {
-  @Override
-  public String getSchemaFile() { return null; }
-  @Override
-  public String getSolrConfigFile() { return null; }
+public class TestSort extends SolrTestCaseJ4 {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig.xml","schema-minimal.xml");
+  }

-  Random r = random;
+  final Random r = random;

  int ndocs = 77;
  int iter = 50;
@ -57,6 +64,92 @@ public class TestSort extends AbstractSolrTestCase {
    }
  }

+  public void testRandomFieldNameSorts() throws Exception {
+    SolrQueryRequest req = lrf.makeRequest("q", "*:*");
+
+    final int iters = atLeast(5000);
+    int numberOfOddities = 0;
+
+    for (int i = 0; i < iters; i++) {
+      final StringBuilder input = new StringBuilder();
+      final String[] names = new String[_TestUtil.nextInt(r,1,10)];
+      final boolean[] reverse = new boolean[names.length];
+      for (int j = 0; j < names.length; j++) {
+        names[j] = _TestUtil.randomRealisticUnicodeString(r, 1, 20);
+
+        // reduce the likelyhood that the random str is a valid query or func 
+        names[j] = names[j].replaceFirst("\\{","\\{\\{");
+        names[j] = names[j].replaceFirst("\\(","\\(\\(");
+        names[j] = names[j].replaceFirst("(\\\"|\\')","$1$1");
+        names[j] = names[j].replaceFirst("(\\d)","$1x");
+
+        // eliminate pesky problem chars
+        names[j] = names[j].replaceAll("\\p{Cntrl}|\\p{javaWhitespace}","");
+
+        if (0 == names[j].length()) {
+          numberOfOddities++;
+          // screw it, i'm taking my toys and going home
+          names[j] = "last_ditch_i_give_up";
+        }
+        reverse[j] = r.nextBoolean();
+
+        input.append(r.nextBoolean() ? " " : "");
+        input.append(names[j]);
+        input.append(" ");
+        input.append(reverse[j] ? "desc," : "asc,");
+      }
+      input.deleteCharAt(input.length()-1);
+      SortField[] sorts = null;
+      try {
+        sorts = QueryParsing.parseSort(input.toString(), req).getSort();
+      } catch (RuntimeException e) {
+        throw new RuntimeException("Failed to parse sort: " + input, e);
+      }
+      assertEquals("parsed sorts had unexpected size", 
+                   names.length, sorts.length);
+      for (int j = 0; j < names.length; j++) {
+        assertEquals("sorts["+j+"] had unexpected reverse: " + input,
+                     reverse[j], sorts[j].getReverse());
+
+        final Type type = sorts[j].getType();
+
+        if (Type.SCORE.equals(type)) {
+          numberOfOddities++;
+          assertEquals("sorts["+j+"] is (unexpectedly) type score : " + input,
+                       "score", names[j]);
+        } else if (Type.DOC.equals(type)) {
+          numberOfOddities++;
+          assertEquals("sorts["+j+"] is (unexpectedly) type doc : " + input,
+                       "_docid_", names[j]);
+        } else if (Type.CUSTOM.equals(type) || Type.REWRITEABLE.equals(type)) {
+          numberOfOddities++;
+
+          // our orig string better be parsable as a func/query
+          QParser qp = 
+            QParser.getParser(names[j], FunctionQParserPlugin.NAME, req);
+          try { 
+            Query q = qp.getQuery();
+            assertNotNull("sorts["+j+"] had type " + type + 
+                          " but parsed to null func/query: " + input, q);
+          } catch (Exception e) {
+            assertNull("sorts["+j+"] had type " + type + 
+                       " but errored parsing as func/query: " + input, e);
+          }
+        } else {
+          assertEquals("sorts["+j+"] had unexpected field: " + input,
+                       names[j], sorts[j].getField());
+        }
+      }
+    }
+
+    assertTrue("Over 0.2% oddities in test: " +
+               numberOfOddities + "/" + iters +
+               " have func/query parsing semenatics gotten broader?",
+               numberOfOddities < 0.002 * iters);
+  }
+
+
+
  public void testSort() throws Exception {
    Directory dir = new RAMDirectory();
    Field f = new Field("f","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
--- a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java
+++ b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java
@ -434,9 +434,15 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {

  @Test
  public void testSortByFunc() throws Exception {
-    assertU(adoc("id", "1", "const_s", "xx", "x_i", "100", "1_s", "a"));
-    assertU(adoc("id", "2", "const_s", "xx", "x_i", "300", "1_s", "c"));
-    assertU(adoc("id", "3", "const_s", "xx", "x_i", "200", "1_s", "b"));
+    assertU(adoc("id",    "1",   "const_s", "xx", 
+                 "x_i",   "100", "1_s", "a",
+                 "x:x_i", "100", "1-1_s", "a"));
+    assertU(adoc("id",    "2",   "const_s", "xx", 
+                 "x_i",   "300", "1_s", "c",
+                 "x:x_i", "300", "1-1_s", "c"));
+    assertU(adoc("id",    "3",   "const_s", "xx", 
+                 "x_i",   "200", "1_s", "b",
+                 "x:x_i", "200", "1-1_s", "b"));
    assertU(commit());

    String desc = "/response/docs==[{'x_i':300},{'x_i':200},{'x_i':100}]";
@ -494,6 +500,12 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
    assertJQ(req("q",q,  "fl","x_i", "sort", "1_s asc")
             ,asc
    );
+    assertJQ(req("q",q,  "fl","x_i", "sort", "x:x_i desc")
+             ,desc
+    );
+    assertJQ(req("q",q,  "fl","x_i", "sort", "1-1_s asc")
+             ,asc
+    );

    // really ugly field name that isn't a java Id, and can't be 
    // parsed as a func, but sorted fine in Solr 1.4