LUCENE-5414: Suggest module should not depend on expression module

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1561415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2014-01-25 22:18:36 +00:00
parent b27fe755d1
commit d1999b7791
5 changed files with 70 additions and 93 deletions

View File

@ -211,6 +211,12 @@ API Changes
etc. but it's easy to override the parseShape method if you wish. (David
Smiley)
* LUCENE-5414: DocumentExpressionDictionary was renamed to
DocumentValueSourceDictionary and all dependencies to the lucene-expression
module were removed from lucene-suggest. DocumentValueSourceDictionary now
only accepts a ValueSource instead of a convenience ctor for an expression
string. (Simon Willnauer)
Optimizations
* LUCENE-5372: Replace StringBuffer by StringBuilder, where possible.

View File

@ -31,13 +31,10 @@
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="${misc.jar}"/>
<pathelement path="${expressions.jar}"/>
<pathelement path="${queries.jar}"/>
<fileset dir="${common.dir}/expressions/lib"/>
<path refid="base.classpath"/>
</path>
<target name="javadocs" depends="javadocs-queries,compile-core">
<invoke-module-javadoc>
<links>

View File

@ -18,22 +18,15 @@ package org.apache.lucene.search.suggest;
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.NumericDocValuesField; // javadocs
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRefIterator;
@ -41,9 +34,8 @@ import org.apache.lucene.util.BytesRefIterator;
* <p>
* Dictionary with terms and optionally payload information
* taken from stored fields in a Lucene index. Similar to
* {@link DocumentDictionary}, except it computes the weight
* of the terms in a document based on a user-defined expression
* having one or more {@link NumericDocValuesField} in the document.
* {@link DocumentDictionary}, except it obtains the weight
* of the terms in a document based on a {@link ValueSource}.
* </p>
* <b>NOTE:</b>
* <ul>
@ -56,60 +48,34 @@ import org.apache.lucene.util.BytesRefIterator;
* do not have a value for a document, then the document is
* rejected by the dictionary
* </li>
* <li>
* All the fields used in <code>weightExpression</code> should
* have values for all documents, if any of the fields do not
* have a value for a document, it will default to 0
* </li>
* </ul>
* <p>
* In practice the {@link ValueSource} will likely be obtained
* using the lucene expression module. The following example shows
* how to create a {@link ValueSource} from a simple addition of two
* fields:
* <code>
* Expression expression = JavascriptCompiler.compile("f1 + f2");
* SimpleBindings bindings = new SimpleBindings();
* bindings.add(new SortField("f1", SortField.Type.LONG));
* bindings.add(new SortField("f2", SortField.Type.LONG));
* ValueSource valueSource = expression.getValueSource(bindings);
* </code>
* </p>
*
*/
public class DocumentExpressionDictionary extends DocumentDictionary {
public class DocumentValueSourceDictionary extends DocumentDictionary {
private final ValueSource weightsValueSource;
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
* for the terms and computes the corresponding weights of the term by compiling the
* user-defined <code>weightExpression</code> using the <code>sortFields</code>
* bindings.
*/
public DocumentExpressionDictionary(IndexReader reader, String field,
String weightExpression, Set<SortField> sortFields) {
this(reader, field, weightExpression, sortFields, null);
}
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
* for the terms, <code>payloadField</code> for the corresponding payloads
* and computes the corresponding weights of the term by compiling the
* user-defined <code>weightExpression</code> using the <code>sortFields</code>
* bindings.
*/
public DocumentExpressionDictionary(IndexReader reader, String field,
String weightExpression, Set<SortField> sortFields, String payload) {
super(reader, field, null, payload);
Expression expression = null;
try {
expression = JavascriptCompiler.compile(weightExpression);
} catch (ParseException e) {
throw new RuntimeException();
}
SimpleBindings bindings = new SimpleBindings();
for (SortField sortField: sortFields) {
bindings.add(sortField);
}
weightsValueSource = expression.getValueSource(bindings);
}
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
* for the terms, <code>payloadField</code> for the corresponding payloads
* and uses the <code>weightsValueSource</code> supplied to determine the
* score.
*/
public DocumentExpressionDictionary(IndexReader reader, String field,
ValueSource weightsValueSource, String payload) {
public DocumentValueSourceDictionary(IndexReader reader, String field,
ValueSource weightsValueSource, String payload) {
super(reader, field, null, payload);
this.weightsValueSource = weightsValueSource;
}
@ -119,18 +85,18 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
* for the terms and uses the <code>weightsValueSource</code> supplied to determine the
* score.
*/
public DocumentExpressionDictionary(IndexReader reader, String field,
ValueSource weightsValueSource) {
public DocumentValueSourceDictionary(IndexReader reader, String field,
ValueSource weightsValueSource) {
super(reader, field, null, null);
this.weightsValueSource = weightsValueSource;
}
@Override
public BytesRefIterator getWordsIterator() throws IOException {
return new DocumentExpressionInputIterator(payloadField!=null);
return new DocumentValueSourceInputIterator(payloadField!=null);
}
final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator {
final class DocumentValueSourceInputIterator extends DocumentDictionary.DocumentInputIterator {
private FunctionValues currentWeightValues;
/** leaves of the reader */
@ -140,7 +106,7 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
/** current leave index */
private int currentLeafIndex = 0;
public DocumentExpressionInputIterator(boolean hasPayloads)
public DocumentValueSourceInputIterator(boolean hasPayloads)
throws IOException {
super(hasPayloads);
leaves = reader.leaves();

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search.suggest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -37,22 +35,24 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
public class DocumentExpressionDictionaryTest extends LuceneTestCase {
public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
static final String FIELD_NAME = "f1";
static final String WEIGHT_FIELD_NAME_1 = "w1";
static final String WEIGHT_FIELD_NAME_2 = "w2";
static final String WEIGHT_FIELD_NAME_3 = "w3";
static final String PAYLOAD_FIELD_NAME = "p1";
private Map<String, Document> generateIndexDocuments(int ndocs) {
Map<String, Document> docs = new HashMap<>();
for(int i = 0; i < ndocs ; i++) {
@ -82,11 +82,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
assertNull(inputIterator.next());
@ -111,11 +107,8 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME);
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = inputIterator.next())!=null) {
@ -124,7 +117,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
assertEquals(inputIterator.weight(), (w1 + w2) - w3);
assertEquals(inputIterator.weight(), (w1 + w2 + w3));
assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
}
assertTrue(docs.isEmpty());
@ -146,11 +139,8 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields);
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd));
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = inputIterator.next())!=null) {
@ -159,7 +149,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2));
assertEquals(inputIterator.weight(), (w1 + w2 + w3));
assertEquals(inputIterator.payload(), null);
}
assertTrue(docs.isEmpty());
@ -202,10 +192,9 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir);
assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
assertEquals(ir.numDocs(), docs.size());
Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME);
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)};
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = inputIterator.next())!=null) {
@ -213,7 +202,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
assertEquals(inputIterator.weight(), w2-w1);
assertEquals(inputIterator.weight(), w2+w1);
assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
}
assertTrue(docs.isEmpty());
@ -236,7 +225,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = inputIterator.next())!=null) {

View File

@ -17,12 +17,17 @@ package org.apache.solr.spelling.suggest;
* limitations under the License.
*/
import java.text.ParseException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.suggest.DocumentExpressionDictionary;
import org.apache.lucene.search.suggest.DocumentValueSourceDictionary;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.DoubleField;
import org.apache.solr.schema.FieldType;
@ -36,7 +41,7 @@ import org.apache.solr.schema.TrieLongField;
import org.apache.solr.search.SolrIndexSearcher;
/**
* Factory for {@link DocumentExpressionDictionary}
* Factory for {@link org.apache.lucene.search.suggest.DocumentValueSourceDictionary}
*/
public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
@ -89,8 +94,22 @@ public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
}
}
return new DocumentExpressionDictionary(searcher.getIndexReader(), field, weightExpression,
sortFields, payloadField);
return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression,
sortFields), payloadField);
}
public ValueSource fromExpression(String weightExpression, Set<SortField> sortFields) {
Expression expression = null;
try {
expression = JavascriptCompiler.compile(weightExpression);
} catch (ParseException e) {
throw new RuntimeException();
}
SimpleBindings bindings = new SimpleBindings();
for (SortField sortField : sortFields) {
bindings.add(sortField);
}
return expression.getValueSource(bindings);
}
private SortField.Type getSortFieldType(SolrCore core, String sortFieldName) {