LUCENE-5414: Suggest module should not depend on expression module

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1561415 13f79535-47bb-0310-9956-ffa450edef68
2014-01-25 22:18:36 +00:00 · 2014-01-25 22:18:36 +00:00 · d1999b7791
parent b27fe755d1
commit d1999b7791
5 changed files with 70 additions and 93 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -211,6 +211,12 @@ API Changes
  etc. but it's easy to override the parseShape method if you wish. (David
  Smiley)

+* LUCENE-5414: DocumentExpressionDictionary was renamed to 
+  DocumentValueSourceDictionary and all dependencies to the lucene-expression
+  module were removed from lucene-suggest. DocumentValueSourceDictionary now
+  only accepts a ValueSource instead of a convenience ctor for an expression
+  string. (Simon Willnauer)
+
 Optimizations

 * LUCENE-5372: Replace StringBuffer by StringBuilder, where possible.
--- a/lucene/suggest/build.xml
+++ b/lucene/suggest/build.xml
@ -31,13 +31,10 @@
  <path id="classpath">
    <pathelement path="${analyzers-common.jar}"/>
    <pathelement path="${misc.jar}"/>
-    <pathelement path="${expressions.jar}"/>
    <pathelement path="${queries.jar}"/>
-    <fileset dir="${common.dir}/expressions/lib"/>
    <path refid="base.classpath"/>
  </path>

-
  <target name="javadocs" depends="javadocs-queries,compile-core">
    <invoke-module-javadoc>
      <links>
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
@ -18,22 +18,15 @@ package org.apache.lucene.search.suggest;
 */

 import java.io.IOException;
-import java.text.ParseException;
 import java.util.HashMap;
 import java.util.List;
-import java.util.Set;

-import org.apache.lucene.document.NumericDocValuesField; // javadocs
-import org.apache.lucene.expressions.Expression;
-import org.apache.lucene.expressions.SimpleBindings;
-import org.apache.lucene.expressions.js.JavascriptCompiler;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRefIterator;


@ -41,9 +34,8 @@ import org.apache.lucene.util.BytesRefIterator;
 * <p>
 * Dictionary with terms and optionally payload information 
 * taken from stored fields in a Lucene index. Similar to 
- * {@link DocumentDictionary}, except it computes the weight
- * of the terms in a document based on a user-defined expression
- * having one or more {@link NumericDocValuesField} in the document.
+ * {@link DocumentDictionary}, except it obtains the weight
+ * of the terms in a document based on a {@link ValueSource}.
 * </p>
 * <b>NOTE:</b> 
 *  <ul>
@ -56,60 +48,34 @@ import org.apache.lucene.util.BytesRefIterator;
 *      do not have a value for a document, then the document is 
 *      rejected by the dictionary
 *    </li>
- *    <li>
- *      All the fields used in <code>weightExpression</code> should
- *      have values for all documents, if any of the fields do not 
- *      have a value for a document, it will default to 0
- *    </li>
 *  </ul>
+ *  <p>
+ *  In practice the {@link ValueSource} will likely be obtained
+ *  using the lucene expression module. The following example shows
+ *  how to create a {@link ValueSource} from a simple addition of two
+ *  fields:
+ *  <code>
+ *    Expression expression = JavascriptCompiler.compile("f1 + f2");
+ *    SimpleBindings bindings = new SimpleBindings();
+ *    bindings.add(new SortField("f1", SortField.Type.LONG));
+ *    bindings.add(new SortField("f2", SortField.Type.LONG));
+ *    ValueSource valueSource = expression.getValueSource(bindings);
+ *  </code>
+ *  </p>
+ *
 */
-public class DocumentExpressionDictionary extends DocumentDictionary {
+public class DocumentValueSourceDictionary extends DocumentDictionary {
  
  private final ValueSource weightsValueSource;
  
-  /**
-   * Creates a new dictionary with the contents of the fields named <code>field</code>
-   * for the terms and computes the corresponding weights of the term by compiling the
-   * user-defined <code>weightExpression</code> using the <code>sortFields</code>
-   * bindings.
-   */
-  public DocumentExpressionDictionary(IndexReader reader, String field,
-      String weightExpression, Set<SortField> sortFields) {
-    this(reader, field, weightExpression, sortFields, null);
-  }
-  
-  /**
-   * Creates a new dictionary with the contents of the fields named <code>field</code>
-   * for the terms, <code>payloadField</code> for the corresponding payloads
-   * and computes the corresponding weights of the term by compiling the
-   * user-defined <code>weightExpression</code> using the <code>sortFields</code>
-   * bindings.
-   */
-  public DocumentExpressionDictionary(IndexReader reader, String field,
-      String weightExpression, Set<SortField> sortFields, String payload) {
-    super(reader, field, null, payload);
-    Expression expression = null;
-    try {
-      expression = JavascriptCompiler.compile(weightExpression);
-    } catch (ParseException e) {
-      throw new RuntimeException();
-    }
-    SimpleBindings bindings = new SimpleBindings();
-    for (SortField sortField: sortFields) {
-      bindings.add(sortField);
-    }
-    
-    weightsValueSource = expression.getValueSource(bindings);
-  }
-  
  /**
   * Creates a new dictionary with the contents of the fields named <code>field</code>
   * for the terms, <code>payloadField</code> for the corresponding payloads
   * and uses the <code>weightsValueSource</code> supplied to determine the 
   * score.
   */
-  public DocumentExpressionDictionary(IndexReader reader, String field,
-      ValueSource weightsValueSource, String payload) {
+  public DocumentValueSourceDictionary(IndexReader reader, String field,
+                                       ValueSource weightsValueSource, String payload) {
    super(reader, field, null, payload);
    this.weightsValueSource = weightsValueSource;  
  }
@ -119,18 +85,18 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
   * for the terms and uses the <code>weightsValueSource</code> supplied to determine the 
   * score.
   */
-  public DocumentExpressionDictionary(IndexReader reader, String field,
-      ValueSource weightsValueSource) {
+  public DocumentValueSourceDictionary(IndexReader reader, String field,
+                                       ValueSource weightsValueSource) {
    super(reader, field, null, null);
    this.weightsValueSource = weightsValueSource;  
  }
  
  @Override
  public BytesRefIterator getWordsIterator() throws IOException {
-    return new DocumentExpressionInputIterator(payloadField!=null);
+    return new DocumentValueSourceInputIterator(payloadField!=null);
  }
  
-  final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator {
+  final class DocumentValueSourceInputIterator extends DocumentDictionary.DocumentInputIterator {
    
    private FunctionValues currentWeightValues;
    /** leaves of the reader */
@ -140,7 +106,7 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
    /** current leave index */
    private int currentLeafIndex = 0;
    
-    public DocumentExpressionInputIterator(boolean hasPayloads)
+    public DocumentValueSourceInputIterator(boolean hasPayloads)
        throws IOException {
      super(hasPayloads);
      leaves = reader.leaves();
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
@ -20,11 +20,9 @@ package org.apache.lucene.search.suggest;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
-import java.util.Set;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@ -37,15 +35,17 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
-import org.apache.lucene.search.SortField;
+import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;

-public class DocumentExpressionDictionaryTest extends LuceneTestCase {
+public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
  
  static final String FIELD_NAME = "f1";
  static final String WEIGHT_FIELD_NAME_1 = "w1";
@ -82,11 +82,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
-    Set<SortField> sortFields = new HashSet<SortField>(); 
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
-    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME);
+    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME,  new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();

    assertNull(inputIterator.next());
@ -111,11 +107,8 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
    writer.close();

    IndexReader ir = DirectoryReader.open(dir);
-    Set<SortField> sortFields = new HashSet<SortField>(); 
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
-    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME);
+    ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
+    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
    BytesRef f;
    while((f = inputIterator.next())!=null) {
@ -124,7 +117,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
      long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
      long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(inputIterator.weight(), (w1 + w2) - w3);
+      assertEquals(inputIterator.weight(), (w1 + w2 + w3));
      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
    }
    assertTrue(docs.isEmpty());
@ -146,11 +139,8 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
    writer.close();

    IndexReader ir = DirectoryReader.open(dir);
-    Set<SortField> sortFields = new HashSet<SortField>(); 
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
-    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields);
+    ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
+    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME,  new SumFloatFunction(toAdd));
    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
    BytesRef f;
    while((f = inputIterator.next())!=null) {
@ -159,7 +149,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
      long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
      long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2));
+      assertEquals(inputIterator.weight(), (w1 + w2 + w3));
      assertEquals(inputIterator.payload(), null);
    }
    assertTrue(docs.isEmpty());
@ -202,10 +192,9 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
    IndexReader ir = DirectoryReader.open(dir);
    assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
    assertEquals(ir.numDocs(), docs.size());
-    Set<SortField> sortFields = new HashSet<SortField>(); 
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
-    sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
-    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME);
+    ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)};
+
+    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME,  new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
    BytesRef f;
    while((f = inputIterator.next())!=null) {
@ -213,7 +202,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
      long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
      long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(inputIterator.weight(), w2-w1);
+      assertEquals(inputIterator.weight(), w2+w1);
      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
    }
    assertTrue(docs.isEmpty());
@ -236,7 +225,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
    writer.close();

    IndexReader ir = DirectoryReader.open(dir);
-    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
+    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
    BytesRef f;
    while((f = inputIterator.next())!=null) {
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java
@ -17,12 +17,17 @@ package org.apache.solr.spelling.suggest;
 * limitations under the License.
 */

+import java.text.ParseException;
 import java.util.HashSet;
 import java.util.Set;

+import org.apache.lucene.expressions.Expression;
+import org.apache.lucene.expressions.SimpleBindings;
+import org.apache.lucene.expressions.js.JavascriptCompiler;
+import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.suggest.DocumentExpressionDictionary;
+import org.apache.lucene.search.suggest.DocumentValueSourceDictionary;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.DoubleField;
 import org.apache.solr.schema.FieldType;
@ -36,7 +41,7 @@ import org.apache.solr.schema.TrieLongField;
 import org.apache.solr.search.SolrIndexSearcher;

 /**
- * Factory for {@link DocumentExpressionDictionary}
+ * Factory for {@link org.apache.lucene.search.suggest.DocumentValueSourceDictionary}
 */
 public class DocumentExpressionDictionaryFactory extends DictionaryFactory {

@ -89,8 +94,22 @@ public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
      }
    }
   
-    return new DocumentExpressionDictionary(searcher.getIndexReader(), field, weightExpression, 
-        sortFields, payloadField);
+    return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression,
+        sortFields), payloadField);
+  }
+
+  public ValueSource fromExpression(String weightExpression, Set<SortField> sortFields) {
+    Expression expression = null;
+    try {
+      expression = JavascriptCompiler.compile(weightExpression);
+    } catch (ParseException e) {
+      throw new RuntimeException();
+    }
+    SimpleBindings bindings = new SimpleBindings();
+    for (SortField sortField : sortFields) {
+      bindings.add(sortField);
+    }
+    return expression.getValueSource(bindings);
  }
  
  private SortField.Type getSortFieldType(SolrCore core, String sortFieldName) {