mirror of https://github.com/apache/lucene.git
SOLR-1302: Why should numbers get to have all the fun, add String distance functions, too
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@883421 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6fcb72a3ad
commit
548f973766
|
@ -34,7 +34,7 @@ Detailed Change List
|
|||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
|
||||
* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan, Euclidean and String (using the StringDistance methods in the Lucene Spellchecker).
|
||||
Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
|
||||
|
||||
* SOLR-1553: New dismax parser implementation (accessible as "edismax")
|
||||
|
|
|
@ -20,6 +20,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.NGramDistance;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.DateField;
|
||||
|
@ -34,6 +38,7 @@ import org.apache.solr.search.function.distance.SquaredEuclideanFunction;
|
|||
import org.apache.solr.search.function.distance.VectorDistanceFunction;
|
||||
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
|
||||
import org.apache.solr.search.function.distance.GeohashFunction;
|
||||
import org.apache.solr.search.function.distance.StringDistanceFunction;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -255,6 +260,30 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
|||
return new GeohashFunction(lat, lon);
|
||||
}
|
||||
});
|
||||
addParser("strdist", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
|
||||
ValueSource str1 = fp.parseValueSource();
|
||||
ValueSource str2 = fp.parseValueSource();
|
||||
String distClass = fp.parseArg();
|
||||
|
||||
StringDistance dist = null;
|
||||
if (distClass.equalsIgnoreCase("jw")) {
|
||||
dist = new JaroWinklerDistance();
|
||||
} else if (distClass.equalsIgnoreCase("edit")) {
|
||||
dist = new LevensteinDistance();
|
||||
} else if (distClass.equalsIgnoreCase("ngram")) {
|
||||
int ngram = 2;
|
||||
if (fp.hasMoreArguments()) {
|
||||
ngram = fp.parseInt();
|
||||
}
|
||||
dist = new NGramDistance(ngram);
|
||||
} else {
|
||||
dist = (StringDistance) fp.req.getCore().getResourceLoader().newInstance(distClass);
|
||||
}
|
||||
return new StringDistanceFunction(str1, str2, dist);
|
||||
}
|
||||
});
|
||||
|
||||
addParser(new DoubleParser("rad") {
|
||||
public double func(int doc, DocValues vals) {
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
package org.apache.solr.search.function.distance;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.solr.search.function.DocValues;
|
||||
import org.apache.solr.search.function.ValueSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class StringDistanceFunction extends ValueSource {
|
||||
protected ValueSource str1, str2;
|
||||
protected StringDistance dist;
|
||||
|
||||
/**
|
||||
* @param str1
|
||||
* @param str2
|
||||
* @param measure
|
||||
*/
|
||||
public StringDistanceFunction(ValueSource str1, ValueSource str2, StringDistance measure) {
|
||||
this.str1 = str1;
|
||||
this.str2 = str2;
|
||||
dist = measure;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
final DocValues str1DV = str1.getValues(context, reader);
|
||||
final DocValues str2DV = str2.getValues(context, reader);
|
||||
return new DocValues() {
|
||||
|
||||
public float floatVal(int doc) {
|
||||
return (float) dist.getDistance(str1DV.strVal(doc), str2DV.strVal(doc));
|
||||
}
|
||||
|
||||
public int intVal(int doc) {
|
||||
return (int) doubleVal(doc);
|
||||
}
|
||||
|
||||
public long longVal(int doc) {
|
||||
return (long) doubleVal(doc);
|
||||
}
|
||||
|
||||
public double doubleVal(int doc) {
|
||||
return (double) floatVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("strdist").append('(');
|
||||
sb.append(str1DV.toString(doc)).append(',').append(str2DV.toString(doc))
|
||||
.append(", dist=").append(dist.getClass().getName());
|
||||
sb.append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public String description() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("strdist").append('(');
|
||||
sb.append(str1).append(',').append(str2).append(", dist=").append(dist.getClass().getName());
|
||||
sb.append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof StringDistanceFunction)) return false;
|
||||
|
||||
StringDistanceFunction that = (StringDistanceFunction) o;
|
||||
|
||||
if (!dist.equals(that.dist)) return false;
|
||||
if (!str1.equals(that.str1)) return false;
|
||||
if (!str2.equals(that.str2)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = str1.hashCode();
|
||||
result = 31 * result + str2.hashCode();
|
||||
result = 31 * result + dist.hashCode();
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -347,6 +347,14 @@ public class TestFunctionQuery extends AbstractSolrTestCase {
|
|||
assertQ(req("fl", "*,score", "q", "{!func}deg(y_td)", "fq", "id:3"), "//float[@name='score']='45.0'");
|
||||
}
|
||||
|
||||
public void testStrDistance() throws Exception {
|
||||
assertU(adoc("id", "1", "x_s", "foil"));
|
||||
assertU(commit());
|
||||
assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', edit)", "fq", "id:1"), "//float[@name='score']='0.75'");
|
||||
assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', jw)", "fq", "id:1"), "//float[@name='score']='0.8833333'");
|
||||
assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', ngram, 2)", "fq", "id:1"), "//float[@name='score']='0.875'");
|
||||
}
|
||||
|
||||
public void dofunc(String func, double val) throws Exception {
|
||||
// String sval = Double.toString(val);
|
||||
String sval = Float.toString((float)val);
|
||||
|
|
Loading…
Reference in New Issue