mirror of https://github.com/apache/lucene.git
LUCENE-5414: Suggest module should not depend on expression module
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1561415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b27fe755d1
commit
d1999b7791
|
@ -211,6 +211,12 @@ API Changes
|
|||
etc. but it's easy to override the parseShape method if you wish. (David
|
||||
Smiley)
|
||||
|
||||
* LUCENE-5414: DocumentExpressionDictionary was renamed to
|
||||
DocumentValueSourceDictionary and all dependencies to the lucene-expression
|
||||
module were removed from lucene-suggest. DocumentValueSourceDictionary now
|
||||
only accepts a ValueSource instead of a convenience ctor for an expression
|
||||
string. (Simon Willnauer)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-5372: Replace StringBuffer by StringBuilder, where possible.
|
||||
|
|
|
@ -31,13 +31,10 @@
|
|||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<pathelement path="${misc.jar}"/>
|
||||
<pathelement path="${expressions.jar}"/>
|
||||
<pathelement path="${queries.jar}"/>
|
||||
<fileset dir="${common.dir}/expressions/lib"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
|
||||
<target name="javadocs" depends="javadocs-queries,compile-core">
|
||||
<invoke-module-javadoc>
|
||||
<links>
|
||||
|
|
|
@ -18,22 +18,15 @@ package org.apache.lucene.search.suggest;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.NumericDocValuesField; // javadocs
|
||||
import org.apache.lucene.expressions.Expression;
|
||||
import org.apache.lucene.expressions.SimpleBindings;
|
||||
import org.apache.lucene.expressions.js.JavascriptCompiler;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
|
||||
|
@ -41,9 +34,8 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
* <p>
|
||||
* Dictionary with terms and optionally payload information
|
||||
* taken from stored fields in a Lucene index. Similar to
|
||||
* {@link DocumentDictionary}, except it computes the weight
|
||||
* of the terms in a document based on a user-defined expression
|
||||
* having one or more {@link NumericDocValuesField} in the document.
|
||||
* {@link DocumentDictionary}, except it obtains the weight
|
||||
* of the terms in a document based on a {@link ValueSource}.
|
||||
* </p>
|
||||
* <b>NOTE:</b>
|
||||
* <ul>
|
||||
|
@ -56,60 +48,34 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
* do not have a value for a document, then the document is
|
||||
* rejected by the dictionary
|
||||
* </li>
|
||||
* <li>
|
||||
* All the fields used in <code>weightExpression</code> should
|
||||
* have values for all documents, if any of the fields do not
|
||||
* have a value for a document, it will default to 0
|
||||
* </li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* In practice the {@link ValueSource} will likely be obtained
|
||||
* using the lucene expression module. The following example shows
|
||||
* how to create a {@link ValueSource} from a simple addition of two
|
||||
* fields:
|
||||
* <code>
|
||||
* Expression expression = JavascriptCompiler.compile("f1 + f2");
|
||||
* SimpleBindings bindings = new SimpleBindings();
|
||||
* bindings.add(new SortField("f1", SortField.Type.LONG));
|
||||
* bindings.add(new SortField("f2", SortField.Type.LONG));
|
||||
* ValueSource valueSource = expression.getValueSource(bindings);
|
||||
* </code>
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
public class DocumentExpressionDictionary extends DocumentDictionary {
|
||||
public class DocumentValueSourceDictionary extends DocumentDictionary {
|
||||
|
||||
private final ValueSource weightsValueSource;
|
||||
|
||||
/**
|
||||
* Creates a new dictionary with the contents of the fields named <code>field</code>
|
||||
* for the terms and computes the corresponding weights of the term by compiling the
|
||||
* user-defined <code>weightExpression</code> using the <code>sortFields</code>
|
||||
* bindings.
|
||||
*/
|
||||
public DocumentExpressionDictionary(IndexReader reader, String field,
|
||||
String weightExpression, Set<SortField> sortFields) {
|
||||
this(reader, field, weightExpression, sortFields, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new dictionary with the contents of the fields named <code>field</code>
|
||||
* for the terms, <code>payloadField</code> for the corresponding payloads
|
||||
* and computes the corresponding weights of the term by compiling the
|
||||
* user-defined <code>weightExpression</code> using the <code>sortFields</code>
|
||||
* bindings.
|
||||
*/
|
||||
public DocumentExpressionDictionary(IndexReader reader, String field,
|
||||
String weightExpression, Set<SortField> sortFields, String payload) {
|
||||
super(reader, field, null, payload);
|
||||
Expression expression = null;
|
||||
try {
|
||||
expression = JavascriptCompiler.compile(weightExpression);
|
||||
} catch (ParseException e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
SimpleBindings bindings = new SimpleBindings();
|
||||
for (SortField sortField: sortFields) {
|
||||
bindings.add(sortField);
|
||||
}
|
||||
|
||||
weightsValueSource = expression.getValueSource(bindings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new dictionary with the contents of the fields named <code>field</code>
|
||||
* for the terms, <code>payloadField</code> for the corresponding payloads
|
||||
* and uses the <code>weightsValueSource</code> supplied to determine the
|
||||
* score.
|
||||
*/
|
||||
public DocumentExpressionDictionary(IndexReader reader, String field,
|
||||
ValueSource weightsValueSource, String payload) {
|
||||
public DocumentValueSourceDictionary(IndexReader reader, String field,
|
||||
ValueSource weightsValueSource, String payload) {
|
||||
super(reader, field, null, payload);
|
||||
this.weightsValueSource = weightsValueSource;
|
||||
}
|
||||
|
@ -119,18 +85,18 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
|
|||
* for the terms and uses the <code>weightsValueSource</code> supplied to determine the
|
||||
* score.
|
||||
*/
|
||||
public DocumentExpressionDictionary(IndexReader reader, String field,
|
||||
ValueSource weightsValueSource) {
|
||||
public DocumentValueSourceDictionary(IndexReader reader, String field,
|
||||
ValueSource weightsValueSource) {
|
||||
super(reader, field, null, null);
|
||||
this.weightsValueSource = weightsValueSource;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefIterator getWordsIterator() throws IOException {
|
||||
return new DocumentExpressionInputIterator(payloadField!=null);
|
||||
return new DocumentValueSourceInputIterator(payloadField!=null);
|
||||
}
|
||||
|
||||
final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator {
|
||||
final class DocumentValueSourceInputIterator extends DocumentDictionary.DocumentInputIterator {
|
||||
|
||||
private FunctionValues currentWeightValues;
|
||||
/** leaves of the reader */
|
||||
|
@ -140,7 +106,7 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
|
|||
/** current leave index */
|
||||
private int currentLeafIndex = 0;
|
||||
|
||||
public DocumentExpressionInputIterator(boolean hasPayloads)
|
||||
public DocumentValueSourceInputIterator(boolean hasPayloads)
|
||||
throws IOException {
|
||||
super(hasPayloads);
|
||||
leaves = reader.leaves();
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search.suggest;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -37,15 +35,17 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
||||
public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
|
||||
|
||||
static final String FIELD_NAME = "f1";
|
||||
static final String WEIGHT_FIELD_NAME_1 = "w1";
|
||||
|
@ -82,11 +82,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
writer.commit();
|
||||
writer.close();
|
||||
IndexReader ir = DirectoryReader.open(dir);
|
||||
Set<SortField> sortFields = new HashSet<SortField>();
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
|
||||
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME);
|
||||
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
|
||||
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
|
||||
|
||||
assertNull(inputIterator.next());
|
||||
|
@ -111,11 +107,8 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
IndexReader ir = DirectoryReader.open(dir);
|
||||
Set<SortField> sortFields = new HashSet<SortField>();
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
|
||||
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME);
|
||||
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
|
||||
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
|
||||
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
|
||||
BytesRef f;
|
||||
while((f = inputIterator.next())!=null) {
|
||||
|
@ -124,7 +117,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
|
||||
long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
|
||||
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
|
||||
assertEquals(inputIterator.weight(), (w1 + w2) - w3);
|
||||
assertEquals(inputIterator.weight(), (w1 + w2 + w3));
|
||||
assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
|
||||
}
|
||||
assertTrue(docs.isEmpty());
|
||||
|
@ -146,11 +139,8 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
IndexReader ir = DirectoryReader.open(dir);
|
||||
Set<SortField> sortFields = new HashSet<SortField>();
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
|
||||
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields);
|
||||
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
|
||||
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd));
|
||||
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
|
||||
BytesRef f;
|
||||
while((f = inputIterator.next())!=null) {
|
||||
|
@ -159,7 +149,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
|
||||
long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
|
||||
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
|
||||
assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2));
|
||||
assertEquals(inputIterator.weight(), (w1 + w2 + w3));
|
||||
assertEquals(inputIterator.payload(), null);
|
||||
}
|
||||
assertTrue(docs.isEmpty());
|
||||
|
@ -202,10 +192,9 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
IndexReader ir = DirectoryReader.open(dir);
|
||||
assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
|
||||
assertEquals(ir.numDocs(), docs.size());
|
||||
Set<SortField> sortFields = new HashSet<SortField>();
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
|
||||
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
|
||||
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME);
|
||||
ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)};
|
||||
|
||||
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
|
||||
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
|
||||
BytesRef f;
|
||||
while((f = inputIterator.next())!=null) {
|
||||
|
@ -213,7 +202,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
|
||||
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
|
||||
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
|
||||
assertEquals(inputIterator.weight(), w2-w1);
|
||||
assertEquals(inputIterator.weight(), w2+w1);
|
||||
assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
|
||||
}
|
||||
assertTrue(docs.isEmpty());
|
||||
|
@ -236,7 +225,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
IndexReader ir = DirectoryReader.open(dir);
|
||||
Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
|
||||
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME);
|
||||
InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
|
||||
BytesRef f;
|
||||
while((f = inputIterator.next())!=null) {
|
|
@ -17,12 +17,17 @@ package org.apache.solr.spelling.suggest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.expressions.Expression;
|
||||
import org.apache.lucene.expressions.SimpleBindings;
|
||||
import org.apache.lucene.expressions.js.JavascriptCompiler;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.suggest.DocumentExpressionDictionary;
|
||||
import org.apache.lucene.search.suggest.DocumentValueSourceDictionary;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.DoubleField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
@ -36,7 +41,7 @@ import org.apache.solr.schema.TrieLongField;
|
|||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Factory for {@link DocumentExpressionDictionary}
|
||||
* Factory for {@link org.apache.lucene.search.suggest.DocumentValueSourceDictionary}
|
||||
*/
|
||||
public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
|
||||
|
||||
|
@ -89,8 +94,22 @@ public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
|
|||
}
|
||||
}
|
||||
|
||||
return new DocumentExpressionDictionary(searcher.getIndexReader(), field, weightExpression,
|
||||
sortFields, payloadField);
|
||||
return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression,
|
||||
sortFields), payloadField);
|
||||
}
|
||||
|
||||
public ValueSource fromExpression(String weightExpression, Set<SortField> sortFields) {
|
||||
Expression expression = null;
|
||||
try {
|
||||
expression = JavascriptCompiler.compile(weightExpression);
|
||||
} catch (ParseException e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
SimpleBindings bindings = new SimpleBindings();
|
||||
for (SortField sortField : sortFields) {
|
||||
bindings.add(sortField);
|
||||
}
|
||||
return expression.getValueSource(bindings);
|
||||
}
|
||||
|
||||
private SortField.Type getSortFieldType(SolrCore core, String sortFieldName) {
|
||||
|
|
Loading…
Reference in New Issue