SOLR-356: pluggable functions

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@632005 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2008-02-28 15:31:04 +00:00
parent a73850ef3b
commit b4d3eeb0d1
3 changed files with 639 additions and 0 deletions

View File

@ -0,0 +1,269 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.function.BoostedQuery;
import org.apache.solr.search.function.DivFloatFunction;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.LinearFloatFunction;
import org.apache.solr.search.function.MaxFloatFunction;
import org.apache.solr.search.function.OrdFieldSource;
import org.apache.solr.search.function.PowFloatFunction;
import org.apache.solr.search.function.ProductFloatFunction;
import org.apache.solr.search.function.QueryValueSource;
import org.apache.solr.search.function.RangeMapFloatFunction;
import org.apache.solr.search.function.ReciprocalFloatFunction;
import org.apache.solr.search.function.ReverseOrdFieldSource;
import org.apache.solr.search.function.ScaleFloatFunction;
import org.apache.solr.search.function.SimpleFloatFunction;
import org.apache.solr.search.function.SumFloatFunction;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
/**
* A factory that parses user queries to generate ValueSource instances.
* Intented usage is to create pluggable, named functions for use in function queries.
*/
public abstract class ValueSourceParser implements NamedListInitializedPlugin
{
/**
* Initialize the plugin.
*/
public abstract void init( NamedList args );
/**
* Parse the user input into a ValueSource.
*
* @param fp
* @return
* @throws ParseException
*/
public abstract ValueSource parse(FunctionQParser fp) throws ParseException;
/* standard functions */
public static Map<String, ValueSourceParser> standardValueSourceParsers = new HashMap<String, ValueSourceParser>();
static {
standardValueSourceParsers.put("ord", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
String field = fp.parseId();
return new OrdFieldSource(field);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("rord", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
String field = fp.parseId();
return new ReverseOrdFieldSource(field);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("linear", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
float slope = fp.parseFloat();
float intercept = fp.parseFloat();
return new LinearFloatFunction(source,slope,intercept);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("max", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
float val = fp.parseFloat();
return new MaxFloatFunction(source,val);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("recip", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
float m = fp.parseFloat();
float a = fp.parseFloat();
float b = fp.parseFloat();
return new ReciprocalFloatFunction(source,m,a,b);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("scale", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
float min = fp.parseFloat();
float max = fp.parseFloat();
return new ScaleFloatFunction(source,min,max);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("pow", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource a = fp.parseValueSource();
ValueSource b = fp.parseValueSource();
return new PowFloatFunction(a,b);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("div", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource a = fp.parseValueSource();
ValueSource b = fp.parseValueSource();
return new DivFloatFunction(a,b);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("map", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
float min = fp.parseFloat();
float max = fp.parseFloat();
float target = fp.parseFloat();
return new RangeMapFloatFunction(source,min,max,target);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("sqrt", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
return new SimpleFloatFunction(source) {
protected String name() {
return "sqrt";
}
protected float func(int doc, DocValues vals) {
return (float)Math.sqrt(vals.floatVal(doc));
}
};
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("log", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
return new SimpleFloatFunction(source) {
protected String name() {
return "log";
}
protected float func(int doc, DocValues vals) {
return (float)Math.log10(vals.floatVal(doc));
}
};
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("abs", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
return new SimpleFloatFunction(source) {
protected String name() {
return "abs";
}
protected float func(int doc, DocValues vals) {
return (float)Math.abs(vals.floatVal(doc));
}
};
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("sum", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
List<ValueSource> sources = fp.parseValueSourceList();
return new SumFloatFunction(sources.toArray(new ValueSource[sources.size()]));
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("product", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
List<ValueSource> sources = fp.parseValueSourceList();
return new ProductFloatFunction(sources.toArray(new ValueSource[sources.size()]));
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("query", new ValueSourceParser() {
// boost(query($q),rating)
public ValueSource parse(FunctionQParser fp) throws ParseException {
Query q = fp.parseNestedQuery();
float defVal = 0.0f;
if (fp.hasMoreArguments()) {
defVal = fp.parseFloat();
}
return new QueryValueSource(q, defVal);
}
public void init(NamedList args) {
}
});
standardValueSourceParsers.put("boost", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
Query q = fp.parseNestedQuery();
ValueSource vs = fp.parseValueSource();
BoostedQuery bq = new BoostedQuery(q, vs);
return new QueryValueSource(bq, 0.0f);
}
public void init(NamedList args) {
}
});
}
}

View File

@ -0,0 +1,58 @@
package org.apache.solr.search.function;
import org.apache.lucene.queryParser.ParseException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.SimpleFloatFunction;
import org.apache.solr.search.function.ValueSource;
/**
* A sample ValueSourceParser for testing. Approximates the oracle NVL function,
* letting you substitude a value when a "null" is encountered. In this case,
* null is approximated by a float value, since ValueSource always returns a
* float, even if the field is undefined for a document.
*
* Initialization parameters:
* - nvlFloatValue: float value to consider as "NULL" when seen in a field. defaults to 0.0f.
*
* Example:
* nvl(vs,2) will return 2 if the vs is NULL (as defined by nvlFloatValue above) or the doc value otherwise
*
*/
public class NvlValueSourceParser extends ValueSourceParser {
/**
* Value to consider "null" when found in a ValueSource Defaults to 0.0
*/
private float nvlFloatValue = 0.0f;
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource source = fp.parseValueSource();
final float nvl = fp.parseFloat();
return new SimpleFloatFunction(source) {
protected String name() {
return "nvl";
}
protected float func(int doc, DocValues vals) {
float v = vals.floatVal(doc);
if (v == nvlFloatValue) {
return nvl;
} else {
return v;
}
}
};
}
public void init(NamedList args) {
/* initialize the value to consider as null */
Float nvlFloatValueArg = (Float) args.get("nvlFloatValue");
if (nvlFloatValueArg != null) {
this.nvlFloatValue = nvlFloatValueArg;
}
}
}

View File

@ -0,0 +1,312 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- $Id$
$Source$
$Name$
-->
<config>
<!-- Used to specify an alternate directory to hold all index data.
It defaults to "index" if not present, and should probably
not be changed if replication is in use. -->
<!--
<indexDir>index</indexDir>
-->
<indexDefaults>
<!-- Values here affect all index writers and act as a default
unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- these are global... can't currently override per index -->
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<lockType>single</lockType>
</indexDefaults>
<mainIndex>
<!-- lucene options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<unlockOnStartup>true</unlockOnStartup>
</mainIndex>
<updateHandler class="solr.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>3600000</maxTime>
</autoCommit>
-->
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
<commitIntervalLowerBound>0</commitIntervalLowerBound>
-->
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">/var/opt/resin3/__PORT__/scripts/solr/snapshooter</str>
<str name="dir">/var/opt/resin3/__PORT__</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<queryResultCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="1024"/>
<documentCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
<cache name="myUserCache"
class="solr.search.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="MyRegenerator"
/>
-->
<useFilterForSortedQuery>true</useFilterForSortedQuery>
<queryResultWindowSize>10</queryResultWindowSize>
<!-- set maxSize artificially low to exercise both types of sets -->
<HashDocSet maxSize="3" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain prewarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the qt (query type) param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler"/>
<requestHandler name="dismaxOldStyleDefaults"
class="solr.DisMaxRequestHandler" >
<!-- for historic reasons, DisMaxRequestHandler will use all of
it's init params as "defaults" if there is no "defaults" list
specified
-->
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</requestHandler>
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
<lst name="defaults">
<str name="q.alt">*:*</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</lst>
</requestHandler>
<requestHandler name="old" class="solr.tst.OldRequestHandler" >
<int name="myparam">1000</int>
<float name="ratio">1.4142135</float>
<arr name="myarr"><int>1</int><int>2</int></arr>
<str>foo</str>
</requestHandler>
<requestHandler name="oldagain" class="solr.tst.OldRequestHandler" >
<lst name="lst1"> <str name="op">sqrt</str> <int name="val">2</int> </lst>
<lst name="lst2"> <str name="op">log</str> <float name="val">10</float> </lst>
</requestHandler>
<requestHandler name="test" class="solr.tst.TestRequestHandler" />
<!-- test query parameter defaults -->
<requestHandler name="defaults" class="solr.StandardRequestHandler">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<!-- test query parameter defaults -->
<requestHandler name="lazy" class="solr.StandardRequestHandler" startup="lazy">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
<highlighting>
<!-- Configure the standard fragmenter -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
</highlighting>
<!-- enable streaming for testing... -->
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
</requestDispatcher>
<admin>
<defaultQuery>solr</defaultQuery>
<gettableFiles>solrconfig.xml scheam.xml admin-extra.html</gettableFiles>
</admin>
<!-- test getting system property -->
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
<!-- test ValueSourceParser plugins -->
<valueSourceParser name="nvl" class="org.apache.solr.search.function.NvlValueSourceParser">
<float name="nvlFloatValue">0.0</float>
</valueSourceParser>
</config>